cancel
Showing results for 
Search instead for 
Did you mean: 
Data Engineering
cancel
Showing results for 
Search instead for 
Did you mean: 

Get file from SharePoint to copy into Azure blob storage

RohitKulkarni
Contributor

Hello Team,

I am trying to copy the xlx files from sharepoint and move to the Azure blob storage

USERNAME = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUsername',label='BIApp').value

PASSWORD = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointPassword',label='BIApp').value

SHAREPOINT_URL = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUrl',label='BIApp').value

SHAREPOINT_SITE =app_config_client.get_configuration_setting(key='BIAppConfig:SharepointSite',label='BIApp').value

SHAREPOINT_DOC = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointDocument',label='BIApp').value

class SharePoint:

   def auth(self):

      self.authcookie = Office365(SHAREPOINT_URL, username = USERNAME, password=PASSWORD).GetCookies()

      self.site = Site(SHAREPOINT_SITE, version=Version.v365, authcookie=self.authcookie)

      return self.site

   

def connect_folder(self, folder_name):

    self.auth_site = self.auth()

    self.sharepoint_dir = ''.join([SHAREPOINT_DOC, folder_name])

    self.folder = self.auth_site.Folder(self.sharepoint_dir)

    return self.folder

   

def download_file(self, file_name, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.get_file(file_name)

   

def _get_files_list(self, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.files

def download_files(self, folder_name):

    self._files_list = self._get_files_list(folder_name)

    return self._files_list

# 1 args = SharePoint folder name.

folder_name = 'Evotec/Global Account Plan/Archive'

# 2 args = SharePoint file name.

file_name = 'Gold Sheet Evotec.xlsx'

# 3 args = SharePoint file name pattern

file_name_pattern = 'Gold Sheet'

# read json file

##ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

ROOT_DIR = os.path.dirname(os.path.abspath(''))

config_path = PurePath(ROOT_DIR, 'config')

##with open(config_path) as config_file:

##  config = json.load(config_file)

##  config = config['azure_storage']

### Storage Account & Path

AZURE_ACCOUNT_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountName',label='BIApp').value

AZURE_ACCESS_KEY=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountAccessKey',label='BIApp').value

CONTAINER_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:salesforceContainerName',label='BIApp').value

AZURE_CONN_STR=f'DefaultEndpointsProtocol=https;AccountName={AZURE_ACCOUNT_NAME};AccountKey={AZURE_ACCESS_KEY};EndpointSuffix=core.windows.net'

KV_CREDENTIAL = ClientSecretCredential(tenant_id=TENANT_ID,client_id=CLIENT_ID,client_secret=CLIENT_SECRET)

KV_SECRETCLIENT = SecretClient(vault_url=KEYVAULT_URI, credential=KV_CREDENTIAL)

# functions used for azure storage

def upload_file_to_blob(file_obj, file_name):

  blob = BlobClient.from_connection_string(

    conn_str=AZURE_CONN_STR,

    container_name=CONTAINER_NAME,

    blob_name=file_name,

    credential=AZURE_ACCESS_KEY

  )

  blob.upload_blob(file_obj)

def get_file(file_n, folder):

  file_obj = SharePoint().download_file(file_n, folder)

  upload_file_to_blob(file_obj, file_n)

   

def get_files(folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    get_file(file['Name'], folder)

     

def get_files_by_pattern(pattern, folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    if re.search(pattern, file['Name']):

      get_file(file['Name'], folder)

       

if __name__ == '__main__':

  if file_name != 'None':

    get_file(file_name, folder_name)

  elif file_name_pattern != 'None':

    get_file_name_pattern(file, folder_name)

  else:

    get_files(folder_name)

I am getting error :

AttributeError: 'SharePoint' object has no attribute 'download_file'

AttributeError Traceback (most recent call last)

<command-3758021352223721> in <cell line: 62>()

if __name__ == '__main__':

if file_name != 'None':

---> get_file(file_name, folder_name)

elif file_name_pattern != 'None':

get_file_name_pattern(file, folder_name)

<command-3758021352223721> in get_file(file_n, folder)

def get_file(file_n, folder):

---> file_obj = SharePoint().download_file(file_n, folder)

upload_file_to_blob(file_obj, file_n)

Please advise Aman.

Regards

Rohit

0 REPLIES 0
Welcome to Databricks Community: Lets learn, network and celebrate together

Join our fast-growing data practitioner and expert community of 80K+ members, ready to discover, help and collaborate together while making meaningful connections. 

Click here to register and join today! 

Engage in exciting technical discussions, join a group with your peers and meet our Featured Members.