09-14-2022 03:00 AM
Hello Team,
I am trying to copy the xlx files from sharepoint and move to the Azure blob storage
USERNAME = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUsername',label='BIApp').value
PASSWORD = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointPassword',label='BIApp').value
SHAREPOINT_URL = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUrl',label='BIApp').value
SHAREPOINT_SITE =app_config_client.get_configuration_setting(key='BIAppConfig:SharepointSite',label='BIApp').value
SHAREPOINT_DOC = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointDocument',label='BIApp').value
class SharePoint:
def auth(self):
self.authcookie = Office365(SHAREPOINT_URL, username = USERNAME, password=PASSWORD).GetCookies()
self.site = Site(SHAREPOINT_SITE, version=Version.v365, authcookie=self.authcookie)
return self.site
def connect_folder(self, folder_name):
self.auth_site = self.auth()
self.sharepoint_dir = ''.join([SHAREPOINT_DOC, folder_name])
self.folder = self.auth_site.Folder(self.sharepoint_dir)
return self.folder
def download_file(self, file_name, folder_name):
self._folder = self.connect_folder(folder_name)
return self._folder.get_file(file_name)
def _get_files_list(self, folder_name):
self._folder = self.connect_folder(folder_name)
return self._folder.files
def download_files(self, folder_name):
self._files_list = self._get_files_list(folder_name)
return self._files_list
# 1 args = SharePoint folder name.
folder_name = 'Evotec/Global Account Plan/Archive'
# 2 args = SharePoint file name.
file_name = 'Gold Sheet Evotec.xlsx'
# 3 args = SharePoint file name pattern
file_name_pattern = 'Gold Sheet'
# read json file
##ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(os.path.abspath(''))
config_path = PurePath(ROOT_DIR, 'config')
##with open(config_path) as config_file:
## config = json.load(config_file)
## config = config['azure_storage']
### Storage Account & Path
AZURE_ACCOUNT_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountName',label='BIApp').value
AZURE_ACCESS_KEY=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountAccessKey',label='BIApp').value
CONTAINER_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:salesforceContainerName',label='BIApp').value
AZURE_CONN_STR=f'DefaultEndpointsProtocol=https;AccountName={AZURE_ACCOUNT_NAME};AccountKey={AZURE_ACCESS_KEY};EndpointSuffix=core.windows.net'
KV_CREDENTIAL = ClientSecretCredential(tenant_id=TENANT_ID,client_id=CLIENT_ID,client_secret=CLIENT_SECRET)
KV_SECRETCLIENT = SecretClient(vault_url=KEYVAULT_URI, credential=KV_CREDENTIAL)
# functions used for azure storage
def upload_file_to_blob(file_obj, file_name):
blob = BlobClient.from_connection_string(
conn_str=AZURE_CONN_STR,
container_name=CONTAINER_NAME,
blob_name=file_name,
credential=AZURE_ACCESS_KEY
)
blob.upload_blob(file_obj)
def get_file(file_n, folder):
file_obj = SharePoint().download_file(file_n, folder)
upload_file_to_blob(file_obj, file_n)
def get_files(folder):
files_list = SharePoint().download_files(folder)
for file in files_list:
get_file(file['Name'], folder)
def get_files_by_pattern(pattern, folder):
files_list = SharePoint().download_files(folder)
for file in files_list:
if re.search(pattern, file['Name']):
get_file(file['Name'], folder)
if __name__ == '__main__':
if file_name != 'None':
get_file(file_name, folder_name)
elif file_name_pattern != 'None':
get_file_name_pattern(file, folder_name)
else:
get_files(folder_name)
I am getting error :
AttributeError: 'SharePoint' object has no attribute 'download_file'
AttributeError Traceback (most recent call last)
<command-3758021352223721> in <cell line: 62>()
if __name__ == '__main__':
if file_name != 'None':
---> get_file(file_name, folder_name)
elif file_name_pattern != 'None':
get_file_name_pattern(file, folder_name)
<command-3758021352223721> in get_file(file_n, folder)
def get_file(file_n, folder):
---> file_obj = SharePoint().download_file(file_n, folder)
upload_file_to_blob(file_obj, file_n)
Please advise.
Regards
Rohit
09-15-2022 10:00 PM
Hi, Share point is not a supported source as per https://docs.databricks.com/data/data-sources/index.html.
09-28-2022 12:10 AM
Hi @Rohit Kulkarni
Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help.
We'd love to hear from you.
Thanks!
Join our fast-growing data practitioner and expert community of 80K+ members, ready to discover, help and collaborate together while making meaningful connections.
Click here to register and join today!
Engage in exciting technical discussions, join a group with your peers and meet our Featured Members.