<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Email Extraction in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/email-extraction/m-p/105662#M42231</link>
    <description>&lt;P class=""&gt;If you face issues with IMAP, consider using &lt;STRONG&gt;Microsoft Graph API&lt;/STRONG&gt; for email access. It provides robust support for Outlook without handling IMAP details and enhances security with OAuth2 tokens.&lt;/P&gt;&lt;P class=""&gt;Followed is a sample script, but I didn't tested it:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;pip install msal

import os
import requests
from msal import ConfidentialClientApplication

# Azure AD App Credentials
CLIENT_ID = os.getenv("CLIENT_ID")  # Client ID from Azure App Registration
CLIENT_SECRET = os.getenv("CLIENT_SECRET")  # Client Secret from Azure App
TENANT_ID = os.getenv("TENANT_ID")  # Tenant ID
EMAIL_ADDRESS = "your-email@company.com"

# Microsoft Graph API URL
GRAPH_API_ENDPOINT = "https://graph.microsoft.com/v1.0"

# Authentication
def get_access_token():
    app = ConfidentialClientApplication(
        CLIENT_ID,
        authority=f"https://login.microsoftonline.com/{TENANT_ID}",
        client_credential=CLIENT_SECRET,
    )

    token_response = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
    if "access_token" in token_response:
        return token_response["access_token"]
    else:
        raise Exception(f"Failed to get access token: {token_response}")

# Get emails with attachments
def get_emails_with_attachments():
    access_token = get_access_token()
    headers = {"Authorization": f"Bearer {access_token}"}
    
    # Fetch the first 10 emails
    response = requests.get(f"{GRAPH_API_ENDPOINT}/users/{EMAIL_ADDRESS}/messages?$filter=hasAttachments eq true", headers=headers)
    response.raise_for_status()
    emails = response.json()["value"]

    for email in emails:
        print(f"Email Subject: {email['subject']}")
        email_id = email["id"]
        download_attachments(email_id, headers)

# Download attachments
def download_attachments(email_id, headers):
    response = requests.get(f"{GRAPH_API_ENDPOINT}/me/messages/{email_id}/attachments", headers=headers)
    response.raise_for_status()
    attachments = response.json()["value"]

    for attachment in attachments:
        if "contentBytes" in attachment:
            filename = attachment["name"]
            file_data = attachment["contentBytes"]
            file_path = f"/tmp/{filename}"

            # Save locally first
            with open(file_path, "wb") as f:
                f.write(bytes.fromhex(file_data.encode("utf-8").hex()))
            print(f"Saved attachment: {filename}")

            # Upload to DBFS
            dbfs_path = f"/dbfs/tmp/{filename}"
            dbutils.fs.cp(f"file:{file_path}", dbfs_path)
            print(f"Uploaded to DBFS: {dbfs_path}")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Another Approach could be, to use Logic Apps, if you are in the Azure Cloud. Have a look here:&amp;nbsp;&lt;A href="https://bakshiharsh55.medium.com/save-e-mail-attachment-to-blob-storage-utilizing-azure-logic-app-9db2b926fa23" target="_blank" rel="noopener"&gt;https://bakshiharsh55.medium.com/save-e-mail-attachment-to-blob-storage-utilizing-azure-logic-app-9db2b926fa23&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 15 Jan 2025 06:21:24 GMT</pubDate>
    <dc:creator>Stefan-Koch</dc:creator>
    <dc:date>2025-01-15T06:21:24Z</dc:date>
    <item>
      <title>Email Extraction</title>
      <link>https://community.databricks.com/t5/data-engineering/email-extraction/m-p/105656#M42228</link>
      <description>&lt;P&gt;Hi , Hope you are doing well. I was trying to extract a specific email attachment from the outlook, and inject into the dbfs loaction, but something went wrong. Could you please help. I am hereby giving the code whcih I used.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; imaplib&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; email&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; os&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; email.header &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; decode_header&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; email.utils &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; parseaddr&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; base64&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;IMAP_SERVER &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;"outlook.office365.com"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;IMAP_PORT &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;993&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;EMAIL_ACCOUNT &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;"------------"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;PASSWORD &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;try&lt;/SPAN&gt;&lt;SPAN&gt;:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; mail &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; imaplib.&lt;/SPAN&gt;&lt;SPAN&gt;IMAP4_SSL&lt;/SPAN&gt;&lt;SPAN&gt;(IMAP_SERVER, IMAP_PORT)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; mail.&lt;/SPAN&gt;&lt;SPAN&gt;login&lt;/SPAN&gt;&lt;SPAN&gt;(EMAIL_ACCOUNT, PASSWORD)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; mail.&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"inbox"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; status, messages &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; mail.&lt;/SPAN&gt;&lt;SPAN&gt;search&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;None&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;'(SUBJECT "API_Files")'&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; email_ids &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; messages[&lt;/SPAN&gt;&lt;SPAN&gt;0&lt;/SPAN&gt;&lt;SPAN&gt;].&lt;/SPAN&gt;&lt;SPAN&gt;split&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;for&lt;/SPAN&gt;&lt;SPAN&gt; email_id &lt;/SPAN&gt;&lt;SPAN&gt;in&lt;/SPAN&gt;&lt;SPAN&gt; email_ids:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; status, msg_data &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; mail.&lt;/SPAN&gt;&lt;SPAN&gt;fetch&lt;/SPAN&gt;&lt;SPAN&gt;(email_id, &lt;/SPAN&gt;&lt;SPAN&gt;"(RFC822)"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;for&lt;/SPAN&gt;&lt;SPAN&gt; response_part &lt;/SPAN&gt;&lt;SPAN&gt;in&lt;/SPAN&gt;&lt;SPAN&gt; msg_data:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;if&lt;/SPAN&gt; &lt;SPAN&gt;isinstance&lt;/SPAN&gt;&lt;SPAN&gt;(response_part, &lt;/SPAN&gt;&lt;SPAN&gt;tuple&lt;/SPAN&gt;&lt;SPAN&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; msg &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; email.&lt;/SPAN&gt;&lt;SPAN&gt;message_from_bytes&lt;/SPAN&gt;&lt;SPAN&gt;(response_part[&lt;/SPAN&gt;&lt;SPAN&gt;1&lt;/SPAN&gt;&lt;SPAN&gt;])&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; subject, encoding &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;decode_header&lt;/SPAN&gt;&lt;SPAN&gt;(msg[&lt;/SPAN&gt;&lt;SPAN&gt;"Subject"&lt;/SPAN&gt;&lt;SPAN&gt;])[&lt;/SPAN&gt;&lt;SPAN&gt;0&lt;/SPAN&gt;&lt;SPAN&gt;]&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;if&lt;/SPAN&gt; &lt;SPAN&gt;isinstance&lt;/SPAN&gt;&lt;SPAN&gt;(subject, &lt;/SPAN&gt;&lt;SPAN&gt;bytes&lt;/SPAN&gt;&lt;SPAN&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; subject &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; subject.&lt;/SPAN&gt;&lt;SPAN&gt;decode&lt;/SPAN&gt;&lt;SPAN&gt;(encoding &lt;/SPAN&gt;&lt;SPAN&gt;if&lt;/SPAN&gt;&lt;SPAN&gt; encoding &lt;/SPAN&gt;&lt;SPAN&gt;else&lt;/SPAN&gt; &lt;SPAN&gt;"utf-8"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; from_ &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; msg.&lt;/SPAN&gt;&lt;SPAN&gt;get&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"From"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; from_email &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;parseaddr&lt;/SPAN&gt;&lt;SPAN&gt;(from_)[&lt;/SPAN&gt;&lt;SPAN&gt;1&lt;/SPAN&gt;&lt;SPAN&gt;]&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;if&lt;/SPAN&gt;&lt;SPAN&gt; msg.&lt;/SPAN&gt;&lt;SPAN&gt;is_multipart&lt;/SPAN&gt;&lt;SPAN&gt;():&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;for&lt;/SPAN&gt;&lt;SPAN&gt; part &lt;/SPAN&gt;&lt;SPAN&gt;in&lt;/SPAN&gt;&lt;SPAN&gt; msg.&lt;/SPAN&gt;&lt;SPAN&gt;walk&lt;/SPAN&gt;&lt;SPAN&gt;():&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; content_type &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; part.&lt;/SPAN&gt;&lt;SPAN&gt;get_content_type&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; content_disposition &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;str&lt;/SPAN&gt;&lt;SPAN&gt;(part.&lt;/SPAN&gt;&lt;SPAN&gt;get&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Content-Disposition"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;if&lt;/SPAN&gt; &lt;SPAN&gt;"attachment"&lt;/SPAN&gt; &lt;SPAN&gt;in&lt;/SPAN&gt;&lt;SPAN&gt; content_disposition:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; filename &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; part.&lt;/SPAN&gt;&lt;SPAN&gt;get_filename&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;if&lt;/SPAN&gt;&lt;SPAN&gt; filename:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; filepath &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"dbfs:/tmp&lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;filename&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;with&lt;/SPAN&gt; &lt;SPAN&gt;open&lt;/SPAN&gt;&lt;SPAN&gt;(filepath, &lt;/SPAN&gt;&lt;SPAN&gt;"wb"&lt;/SPAN&gt;&lt;SPAN&gt;) &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; f:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; f.&lt;/SPAN&gt;&lt;SPAN&gt;write&lt;/SPAN&gt;&lt;SPAN&gt;(part.&lt;/SPAN&gt;&lt;SPAN&gt;get_payload&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;decode&lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"Attachment saved to &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;filepath&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;else&lt;/SPAN&gt;&lt;SPAN&gt;:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;pass&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; mail.&lt;/SPAN&gt;&lt;SPAN&gt;logout&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;except&lt;/SPAN&gt;&lt;SPAN&gt; imaplib.IMAP4.error &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; e:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"IMAP error: &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;e&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 15 Jan 2025 04:19:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/email-extraction/m-p/105656#M42228</guid>
      <dc:creator>Sangeetha112</dc:creator>
      <dc:date>2025-01-15T04:19:22Z</dc:date>
    </item>
    <item>
      <title>Re: Email Extraction</title>
      <link>https://community.databricks.com/t5/data-engineering/email-extraction/m-p/105662#M42231</link>
      <description>&lt;P class=""&gt;If you face issues with IMAP, consider using &lt;STRONG&gt;Microsoft Graph API&lt;/STRONG&gt; for email access. It provides robust support for Outlook without handling IMAP details and enhances security with OAuth2 tokens.&lt;/P&gt;&lt;P class=""&gt;Followed is a sample script, but I didn't tested it:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;pip install msal

import os
import requests
from msal import ConfidentialClientApplication

# Azure AD App Credentials
CLIENT_ID = os.getenv("CLIENT_ID")  # Client ID from Azure App Registration
CLIENT_SECRET = os.getenv("CLIENT_SECRET")  # Client Secret from Azure App
TENANT_ID = os.getenv("TENANT_ID")  # Tenant ID
EMAIL_ADDRESS = "your-email@company.com"

# Microsoft Graph API URL
GRAPH_API_ENDPOINT = "https://graph.microsoft.com/v1.0"

# Authentication
def get_access_token():
    app = ConfidentialClientApplication(
        CLIENT_ID,
        authority=f"https://login.microsoftonline.com/{TENANT_ID}",
        client_credential=CLIENT_SECRET,
    )

    token_response = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
    if "access_token" in token_response:
        return token_response["access_token"]
    else:
        raise Exception(f"Failed to get access token: {token_response}")

# Get emails with attachments
def get_emails_with_attachments():
    access_token = get_access_token()
    headers = {"Authorization": f"Bearer {access_token}"}
    
    # Fetch the first 10 emails
    response = requests.get(f"{GRAPH_API_ENDPOINT}/users/{EMAIL_ADDRESS}/messages?$filter=hasAttachments eq true", headers=headers)
    response.raise_for_status()
    emails = response.json()["value"]

    for email in emails:
        print(f"Email Subject: {email['subject']}")
        email_id = email["id"]
        download_attachments(email_id, headers)

# Download attachments
def download_attachments(email_id, headers):
    response = requests.get(f"{GRAPH_API_ENDPOINT}/me/messages/{email_id}/attachments", headers=headers)
    response.raise_for_status()
    attachments = response.json()["value"]

    for attachment in attachments:
        if "contentBytes" in attachment:
            filename = attachment["name"]
            file_data = attachment["contentBytes"]
            file_path = f"/tmp/{filename}"

            # Save locally first
            with open(file_path, "wb") as f:
                f.write(bytes.fromhex(file_data.encode("utf-8").hex()))
            print(f"Saved attachment: {filename}")

            # Upload to DBFS
            dbfs_path = f"/dbfs/tmp/{filename}"
            dbutils.fs.cp(f"file:{file_path}", dbfs_path)
            print(f"Uploaded to DBFS: {dbfs_path}")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Another Approach could be, to use Logic Apps, if you are in the Azure Cloud. Have a look here:&amp;nbsp;&lt;A href="https://bakshiharsh55.medium.com/save-e-mail-attachment-to-blob-storage-utilizing-azure-logic-app-9db2b926fa23" target="_blank" rel="noopener"&gt;https://bakshiharsh55.medium.com/save-e-mail-attachment-to-blob-storage-utilizing-azure-logic-app-9db2b926fa23&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 15 Jan 2025 06:21:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/email-extraction/m-p/105662#M42231</guid>
      <dc:creator>Stefan-Koch</dc:creator>
      <dc:date>2025-01-15T06:21:24Z</dc:date>
    </item>
  </channel>
</rss>

