cancel
Showing results for 
Search instead for 
Did you mean: 
Data Engineering
cancel
Showing results for 
Search instead for 
Did you mean: 

Get file from SharePoint to copy into Azure blob storage

RohitKulkarni
Contributor

Hello Team,

I am trying to copy the xlx files from sharepoint and move to the Azure blob storage

USERNAME = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUsername',label='BIApp').value

PASSWORD = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointPassword',label='BIApp').value

SHAREPOINT_URL = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUrl',label='BIApp').value

SHAREPOINT_SITE =app_config_client.get_configuration_setting(key='BIAppConfig:SharepointSite',label='BIApp').value

SHAREPOINT_DOC = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointDocument',label='BIApp').value

class SharePoint:

   def auth(self):

      self.authcookie = Office365(SHAREPOINT_URL, username = USERNAME, password=PASSWORD).GetCookies()

      self.site = Site(SHAREPOINT_SITE, version=Version.v365, authcookie=self.authcookie)

      return self.site

   

def connect_folder(self, folder_name):

    self.auth_site = self.auth()

    self.sharepoint_dir = ''.join([SHAREPOINT_DOC, folder_name])

    self.folder = self.auth_site.Folder(self.sharepoint_dir)

    return self.folder

   

def download_file(self, file_name, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.get_file(file_name)

   

def _get_files_list(self, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.files

def download_files(self, folder_name):

    self._files_list = self._get_files_list(folder_name)

    return self._files_list

# 1 args = SharePoint folder name.

folder_name = 'Evotec/Global Account Plan/Archive'

# 2 args = SharePoint file name.

file_name = 'Gold Sheet Evotec.xlsx'

# 3 args = SharePoint file name pattern

file_name_pattern = 'Gold Sheet'

# read json file

##ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

ROOT_DIR = os.path.dirname(os.path.abspath(''))

config_path = PurePath(ROOT_DIR, 'config')

##with open(config_path) as config_file:

##  config = json.load(config_file)

##  config = config['azure_storage']

### Storage Account & Path

AZURE_ACCOUNT_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountName',label='BIApp').value

AZURE_ACCESS_KEY=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountAccessKey',label='BIApp').value

CONTAINER_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:salesforceContainerName',label='BIApp').value

AZURE_CONN_STR=f'DefaultEndpointsProtocol=https;AccountName={AZURE_ACCOUNT_NAME};AccountKey={AZURE_ACCESS_KEY};EndpointSuffix=core.windows.net'

KV_CREDENTIAL = ClientSecretCredential(tenant_id=TENANT_ID,client_id=CLIENT_ID,client_secret=CLIENT_SECRET)

KV_SECRETCLIENT = SecretClient(vault_url=KEYVAULT_URI, credential=KV_CREDENTIAL)

# functions used for azure storage

def upload_file_to_blob(file_obj, file_name):

  blob = BlobClient.from_connection_string(

    conn_str=AZURE_CONN_STR,

    container_name=CONTAINER_NAME,

    blob_name=file_name,

    credential=AZURE_ACCESS_KEY

  )

  blob.upload_blob(file_obj)

def get_file(file_n, folder):

  file_obj = SharePoint().download_file(file_n, folder)

  upload_file_to_blob(file_obj, file_n)

   

def get_files(folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    get_file(file['Name'], folder)

     

def get_files_by_pattern(pattern, folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    if re.search(pattern, file['Name']):

      get_file(file['Name'], folder)

       

if __name__ == '__main__':

  if file_name != 'None':

    get_file(file_name, folder_name)

  elif file_name_pattern != 'None':

    get_file_name_pattern(file, folder_name)

  else:

    get_files(folder_name)

I am getting error :

AttributeError: 'SharePoint' object has no attribute 'download_file'

AttributeError Traceback (most recent call last)

<command-3758021352223721> in <cell line: 62>()

if __name__ == '__main__':

if file_name != 'None':

---> get_file(file_name, folder_name)

elif file_name_pattern != 'None':

get_file_name_pattern(file, folder_name)

<command-3758021352223721> in get_file(file_n, folder)

def get_file(file_n, folder):

---> file_obj = SharePoint().download_file(file_n, folder)

upload_file_to_blob(file_obj, file_n)

Please advise.

Regards

Rohit

2 REPLIES 2

Debayan
Esteemed Contributor III
Esteemed Contributor III

Hi, Share point is not a supported source as per https://docs.databricks.com/data/data-sources/index.html.

Anonymous
Not applicable

Hi @Rohit Kulkarni​ 

Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help. 

We'd love to hear from you.

Thanks!

Welcome to Databricks Community: Lets learn, network and celebrate together

Join our fast-growing data practitioner and expert community of 80K+ members, ready to discover, help and collaborate together while making meaningful connections. 

Click here to register and join today! 

Engage in exciting technical discussions, join a group with your peers and meet our Featured Members.