Get file from SharePoint to copy into Azure blob storage
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
09-14-2022 03:00 AM
Hello Team,
I am trying to copy the xlx files from sharepoint and move to the Azure blob storage
USERNAME = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUsername',label='BIApp').value
PASSWORD = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointPassword',label='BIApp').value
SHAREPOINT_URL = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUrl',label='BIApp').value
SHAREPOINT_SITE =app_config_client.get_configuration_setting(key='BIAppConfig:SharepointSite',label='BIApp').value
SHAREPOINT_DOC = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointDocument',label='BIApp').value
class SharePoint:
def auth(self):
self.authcookie = Office365(SHAREPOINT_URL, username = USERNAME, password=PASSWORD).GetCookies()
self.site = Site(SHAREPOINT_SITE, version=Version.v365, authcookie=self.authcookie)
return self.site
def connect_folder(self, folder_name):
self.auth_site = self.auth()
self.sharepoint_dir = ''.join([SHAREPOINT_DOC, folder_name])
self.folder = self.auth_site.Folder(self.sharepoint_dir)
return self.folder
def download_file(self, file_name, folder_name):
self._folder = self.connect_folder(folder_name)
return self._folder.get_file(file_name)
def _get_files_list(self, folder_name):
self._folder = self.connect_folder(folder_name)
return self._folder.files
def download_files(self, folder_name):
self._files_list = self._get_files_list(folder_name)
return self._files_list
# 1 args = SharePoint folder name.
folder_name = 'Evotec/Global Account Plan/Archive'
# 2 args = SharePoint file name.
file_name = 'Gold Sheet Evotec.xlsx'
# 3 args = SharePoint file name pattern
file_name_pattern = 'Gold Sheet'
# read json file
##ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(os.path.abspath(''))
config_path = PurePath(ROOT_DIR, 'config')
##with open(config_path) as config_file:
## config = json.load(config_file)
## config = config['azure_storage']
### Storage Account & Path
AZURE_ACCOUNT_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountName',label='BIApp').value
AZURE_ACCESS_KEY=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountAccessKey',label='BIApp').value
CONTAINER_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:salesforceContainerName',label='BIApp').value
AZURE_CONN_STR=f'DefaultEndpointsProtocol=https;AccountName={AZURE_ACCOUNT_NAME};AccountKey={AZURE_ACCESS_KEY};EndpointSuffix=core.windows.net'
KV_CREDENTIAL = ClientSecretCredential(tenant_id=TENANT_ID,client_id=CLIENT_ID,client_secret=CLIENT_SECRET)
KV_SECRETCLIENT = SecretClient(vault_url=KEYVAULT_URI, credential=KV_CREDENTIAL)
# functions used for azure storage
def upload_file_to_blob(file_obj, file_name):
blob = BlobClient.from_connection_string(
conn_str=AZURE_CONN_STR,
container_name=CONTAINER_NAME,
blob_name=file_name,
credential=AZURE_ACCESS_KEY
)
blob.upload_blob(file_obj)
def get_file(file_n, folder):
file_obj = SharePoint().download_file(file_n, folder)
upload_file_to_blob(file_obj, file_n)
def get_files(folder):
files_list = SharePoint().download_files(folder)
for file in files_list:
get_file(file['Name'], folder)
def get_files_by_pattern(pattern, folder):
files_list = SharePoint().download_files(folder)
for file in files_list:
if re.search(pattern, file['Name']):
get_file(file['Name'], folder)
if __name__ == '__main__':
if file_name != 'None':
get_file(file_name, folder_name)
elif file_name_pattern != 'None':
get_file_name_pattern(file, folder_name)
else:
get_files(folder_name)
I am getting error :
AttributeError: 'SharePoint' object has no attribute 'download_file'
AttributeError Traceback (most recent call last)
<command-3758021352223721> in <cell line: 62>()
if __name__ == '__main__':
if file_name != 'None':
---> get_file(file_name, folder_name)
elif file_name_pattern != 'None':
get_file_name_pattern(file, folder_name)
<command-3758021352223721> in get_file(file_n, folder)
def get_file(file_n, folder):
---> file_obj = SharePoint().download_file(file_n, folder)
upload_file_to_blob(file_obj, file_n)
Please advise.
Regards
Rohit
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
09-15-2022 10:00 PM
Hi, Share point is not a supported source as per https://docs.databricks.com/data/data-sources/index.html.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
09-28-2022 12:10 AM
Hi @Rohit Kulkarni
Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help.
We'd love to hear from you.
Thanks!