cancel
Showing results for 
Search instead for 
Did you mean: 
Data Engineering
Join discussions on data engineering best practices, architectures, and optimization strategies within the Databricks Community. Exchange insights and solutions with fellow data engineers.
cancel
Showing results for 
Search instead for 
Did you mean: 

Get file from SharePoint to copy into Azure blob storage

RohitKulkarni
Contributor II

Hello Team,

I am trying to copy the xlx files from sharepoint and move to the Azure blob storage

USERNAME = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUsername',label='BIApp').value

PASSWORD = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointPassword',label='BIApp').value

SHAREPOINT_URL = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointUrl',label='BIApp').value

SHAREPOINT_SITE =app_config_client.get_configuration_setting(key='BIAppConfig:SharepointSite',label='BIApp').value

SHAREPOINT_DOC = app_config_client.get_configuration_setting(key='BIAppConfig:SharepointDocument',label='BIApp').value

class SharePoint:

   def auth(self):

      self.authcookie = Office365(SHAREPOINT_URL, username = USERNAME, password=PASSWORD).GetCookies()

      self.site = Site(SHAREPOINT_SITE, version=Version.v365, authcookie=self.authcookie)

      return self.site

   

def connect_folder(self, folder_name):

    self.auth_site = self.auth()

    self.sharepoint_dir = ''.join([SHAREPOINT_DOC, folder_name])

    self.folder = self.auth_site.Folder(self.sharepoint_dir)

    return self.folder

   

def download_file(self, file_name, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.get_file(file_name)

   

def _get_files_list(self, folder_name):

    self._folder = self.connect_folder(folder_name)

    return self._folder.files

def download_files(self, folder_name):

    self._files_list = self._get_files_list(folder_name)

    return self._files_list

# 1 args = SharePoint folder name.

folder_name = 'Evotec/Global Account Plan/Archive'

# 2 args = SharePoint file name.

file_name = 'Gold Sheet Evotec.xlsx'

# 3 args = SharePoint file name pattern

file_name_pattern = 'Gold Sheet'

# read json file

##ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

ROOT_DIR = os.path.dirname(os.path.abspath(''))

config_path = PurePath(ROOT_DIR, 'config')

##with open(config_path) as config_file:

##  config = json.load(config_file)

##  config = config['azure_storage']

### Storage Account & Path

AZURE_ACCOUNT_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountName',label='BIApp').value

AZURE_ACCESS_KEY=app_config_client.get_configuration_setting(key='BIAppConfig:storageAccountAccessKey',label='BIApp').value

CONTAINER_NAME=app_config_client.get_configuration_setting(key='BIAppConfig:salesforceContainerName',label='BIApp').value

AZURE_CONN_STR=f'DefaultEndpointsProtocol=https;AccountName={AZURE_ACCOUNT_NAME};AccountKey={AZURE_ACCESS_KEY};EndpointSuffix=core.windows.net'

KV_CREDENTIAL = ClientSecretCredential(tenant_id=TENANT_ID,client_id=CLIENT_ID,client_secret=CLIENT_SECRET)

KV_SECRETCLIENT = SecretClient(vault_url=KEYVAULT_URI, credential=KV_CREDENTIAL)

# functions used for azure storage

def upload_file_to_blob(file_obj, file_name):

  blob = BlobClient.from_connection_string(

    conn_str=AZURE_CONN_STR,

    container_name=CONTAINER_NAME,

    blob_name=file_name,

    credential=AZURE_ACCESS_KEY

  )

  blob.upload_blob(file_obj)

def get_file(file_n, folder):

  file_obj = SharePoint().download_file(file_n, folder)

  upload_file_to_blob(file_obj, file_n)

   

def get_files(folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    get_file(file['Name'], folder)

     

def get_files_by_pattern(pattern, folder):

  files_list = SharePoint().download_files(folder)

  for file in files_list:

    if re.search(pattern, file['Name']):

      get_file(file['Name'], folder)

       

if __name__ == '__main__':

  if file_name != 'None':

    get_file(file_name, folder_name)

  elif file_name_pattern != 'None':

    get_file_name_pattern(file, folder_name)

  else:

    get_files(folder_name)

I am getting error :

AttributeError: 'SharePoint' object has no attribute 'download_file'

AttributeError Traceback (most recent call last)

<command-3758021352223721> in <cell line: 62>()

if __name__ == '__main__':

if file_name != 'None':

---> get_file(file_name, folder_name)

elif file_name_pattern != 'None':

get_file_name_pattern(file, folder_name)

<command-3758021352223721> in get_file(file_n, folder)

def get_file(file_n, folder):

---> file_obj = SharePoint().download_file(file_n, folder)

upload_file_to_blob(file_obj, file_n)

Please advise.

Regards

Rohit

2 REPLIES 2

Debayan
Databricks Employee
Databricks Employee

Hi, Share point is not a supported source as per https://docs.databricks.com/data/data-sources/index.html.

Anonymous
Not applicable

Hi @Rohit Kulkarni​ 

Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help. 

We'd love to hear from you.

Thanks!

Connect with Databricks Users in Your Area

Join a Regional User Group to connect with local Databricks users. Events will be happening in your city, and you won’t want to miss the chance to attend and share knowledge.

If there isn’t a group near you, start one and help create a community that brings people together.

Request a New Group