cancel
Showing results for 
Search instead for 
Did you mean: 
Data Engineering
Join discussions on data engineering best practices, architectures, and optimization strategies within the Databricks Community. Exchange insights and solutions with fellow data engineers.
cancel
Showing results for 
Search instead for 
Did you mean: 

Get file from SharePoint to copy into Azure blob storage

RohitKulkarni
Contributor

Hello Team,

I am trying to copy the xlx files from sharepoint and move to the Azure blob storage

USERNAME = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUsername',label='BIApp').value

PASSWORD = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointPassword',label='BIApp').value

SHAREPOINT_URL = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUrl',label='BIApp').value

SHAREPOINT_SITE =app_config_client.get_configuration_setting(key='BIAppConfig:SharepointSite',label='BIApp').value

SHAREPOINT_DOC = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointDocument',label='BIApp').value

class SharePoint:

   def auth(self):

      self.authcookie = Office365(SHAREPOINT_URL, username = USERNAME, password=PASSWORD).GetCookies()

      self.site = Site(SHAREPOINT_SITE, version=Version.v365, authcookie=self.authcookie)

      return self.site

   

def connect_folder(self, folder_name):

    self.auth_site = self.auth()

    self.sharepoint_dir = ''.join([SHAREPOINT_DOC, folder_name])

    self.folder = self.auth_site.Folder(self.sharepoint_dir)

    return self.folder

   

def download_file(self, file_name, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.get_file(file_name)

   

def _get_files_list(self, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.files

def download_files(self, folder_name):

    self._files_list = self._get_files_list(folder_name)

    return self._files_list

# 1 args = SharePoint folder name.

folder_name = 'Evotec/Global Account Plan/Archive'

# 2 args = SharePoint file name.

file_name = 'Gold Sheet Evotec.xlsx'

# 3 args = SharePoint file name pattern

file_name_pattern = 'Gold Sheet'

# read json file

##ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

ROOT_DIR = os.path.dirname(os.path.abspath(''))

config_path = PurePath(ROOT_DIR, 'config')

##with open(config_path) as config_file:

##  config = json.load(config_file)

##  config = config['azure_storage']

### Storage Account & Path

AZURE_ACCOUNT_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountName',label='BIApp').value

AZURE_ACCESS_KEY=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountAccessKey',label='BIApp').value

CONTAINER_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:salesforceContainerName',label='BIApp').value

AZURE_CONN_STR=f'DefaultEndpointsProtocol=https;AccountName={AZURE_ACCOUNT_NAME};AccountKey={AZURE_ACCESS_KEY};EndpointSuffix=core.windows.net'

KV_CREDENTIAL = ClientSecretCredential(tenant_id=TENANT_ID,client_id=CLIENT_ID,client_secret=CLIENT_SECRET)

KV_SECRETCLIENT = SecretClient(vault_url=KEYVAULT_URI, credential=KV_CREDENTIAL)

# functions used for azure storage

def upload_file_to_blob(file_obj, file_name):

  blob = BlobClient.from_connection_string(

    conn_str=AZURE_CONN_STR,

    container_name=CONTAINER_NAME,

    blob_name=file_name,

    credential=AZURE_ACCESS_KEY

  )

  blob.upload_blob(file_obj)

def get_file(file_n, folder):

  file_obj = SharePoint().download_file(file_n, folder)

  upload_file_to_blob(file_obj, file_n)

   

def get_files(folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    get_file(file['Name'], folder)

     

def get_files_by_pattern(pattern, folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    if re.search(pattern, file['Name']):

      get_file(file['Name'], folder)

       

if __name__ == '__main__':

  if file_name != 'None':

    get_file(file_name, folder_name)

  elif file_name_pattern != 'None':

    get_file_name_pattern(file, folder_name)

  else:

    get_files(folder_name)

I am getting error :

AttributeError: 'SharePoint' object has no attribute 'download_file'

AttributeError Traceback (most recent call last)

<command-3758021352223721> in <cell line: 62>()

if __name__ == '__main__':

if file_name != 'None':

---> get_file(file_name, folder_name)

elif file_name_pattern != 'None':

get_file_name_pattern(file, folder_name)

<command-3758021352223721> in get_file(file_n, folder)

def get_file(file_n, folder):

---> file_obj = SharePoint().download_file(file_n, folder)

upload_file_to_blob(file_obj, file_n)

Please advise Aman.

Regards

Rohit

0 REPLIES 0
Join 100K+ Data Experts: Register Now & Grow with Us!

Excited to expand your horizons with us? Click here to Register and begin your journey to success!

Already a member? Login and join your local regional user group! If there isn’t one near you, fill out this form and we’ll create one for you to join!