<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: AutoLoader File notification mode Configuration with AWS in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67683#M33417</link>
    <description>&lt;P&gt;Was this resolved? I run into the same issue&lt;/P&gt;</description>
    <pubDate>Tue, 30 Apr 2024 13:53:35 GMT</pubDate>
    <dc:creator>djhs</dc:creator>
    <dc:date>2024-04-30T13:53:35Z</dc:date>
    <item>
      <title>AutoLoader File notification mode Configuration with AWS</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/56902#M30685</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark.sql import DataFrame, Column
from pyspark.sql.types import Row
import dlt

S3_PATH = 's3://datalake-lab/XXXXX/'
S3_SCHEMA = 's3://datalake-lab/XXXXX/schemas/'

raw_df = (
    spark.readStream.format("cloudFiles")
    .option("cloudFiles.format", "parquet")
    .option("cloudFiles.schemaLocation", f"{S3_SCHEMA}")
    .option("cloudFiles.useNotifications", "true")
    .option("cloudFiles.region", "ap-northeast-1")
    .option(
        "cloudFiles.queueUrl",
        "https://sqs.ap-northeast-1.amazonaws.com/372383439276/databricks-auto-ingest-test",
    )
    .load(f"{S3_PATH}")  
)


display(raw_df) &lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;[All Policy]&lt;BR /&gt;I am applying the following policy to the IAM that I am using as an instance profile for databricks.&lt;BR /&gt;&lt;BR /&gt;-&amp;nbsp; Instance Profile's Policies&lt;/P&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;# Policy 1
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:*"
            ],
            "Resource": [
                "arn:aws:s3::datalake-lab"
            ]
        },
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:GetObject",
                "s3:DeleteObject",
                "s3:PutObjectAcl"
            ],
            "Resource": [
                "arn:aws:s3:::datalake-lab/*"
            ]
        }
    ]
}

# Policy 2
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "DatabricksAutoLoaderSetup",
            "Effect": "Allow",
            "Action": [
                "s3:GetBucketNotification",
                "s3:PutBucketNotification",
                "sns:ListSubscriptionsByTopic",
                "sns:GetTopicAttributes",
                "sns:SetTopicAttributes",
                "sns:CreateTopic",
                "sns:TagResource",
                "sns:Publish",
                "sns:Subscribe",
                "sqs:CreateQueue",
                "sqs:DeleteMessage",
                "sqs:ReceiveMessage",
                "sqs:SendMessage",
                "sqs:GetQueueUrl",
                "sqs:GetQueueAttributes",
                "sqs:SetQueueAttributes",
                "sqs:TagQueue",
                "sqs:ChangeMessageVisibility"
            ],
            "Resource": [
                "arn:aws:s3:::datalake-lab",
                "arn:aws:sqs:&amp;lt;reegion&amp;gt;:&amp;lt;user&amp;gt;:databricks-auto-ingest-test"
            ]
        },
        {
            "Sid": "DatabricksAutoLoaderList",
            "Effect": "Allow",
            "Action": [
                "sqs:ListQueues",
                "sqs:ListQueueTags",
                "sns:ListTopics"
            ],
            "Resource": "*"
        },
        {
            "Sid": "DatabricksAutoLoaderTeardown",
            "Effect": "Allow",
            "Action": [
                "sns:Unsubscribe",
                "sns:DeleteTopic",
                "sqs:DeleteQueue"
            ],
            "Resource": [
                "arn:aws:sqs:&amp;lt;region&amp;gt;:&amp;lt;user&amp;gt;:databricks-auto-ingest-test"
            ]
        }
    ]
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&amp;nbsp;- SQS Policy&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;{
  "Version": "2012-10-17",
  "Id": "PolicyID1234567890",
  "Statement": [
    {
      "Sid": "AllowS3BucketNotifications",
      "Effect": "Allow",
      "Principal": {
        "Service": "s3.amazonaws.com"
      },
      "Action": "sqs:SendMessage",
      "Resource": "arn:aws:sqs:&amp;lt;region&amp;gt;:&amp;lt;user&amp;gt;:databricks-auto-ingest-test",
      "Condition": {
        "ArnLike": {
          "aws:SourceArn": "arn:aws:s3:::ktown4u-datalake-lab"
        }
      }
    },
    {
      "Sid": "AllowDatabricksRoleAccessToSQSPart1",
      "Effect": "Allow",
      "Principal": {
        "AWS": "&amp;lt;Instance Profile ARN&amp;gt;"
      },
      "Action": [
        "sqs:CreateQueue",
        "sqs:DeleteMessage",
        "sqs:ReceiveMessage",
        "sqs:SendMessage",
        "sqs:GetQueueUrl",
        "sqs:GetQueueAttributes",
        "sqs:SetQueueAttributes"
      ],
      "Resource": "arn:aws:sqs:&amp;lt;region&amp;gt;:&amp;lt;user&amp;gt;:databricks-auto-ingest-test"
    },
    {
      "Sid": "AllowDatabricksRoleAccessToSQSPart2",
      "Effect": "Allow",
      "Principal": {
        "AWS": "&amp;lt;Instance Profile ARN&amp;gt;"
      },
      "Action": [
        "sqs:TagQueue",
        "sqs:ChangeMessageVisibility"
      ],
      "Resource": "arn:aws:sqs:&amp;lt;region&amp;gt;:&amp;lt;user&amp;gt;:databricks-auto-ingest-test"
    }
  ]
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I encountered the following error.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;gt; com.amazonaws.services.sqs.model.AmazonSQSException: User: anonymous is not authorized to perform: sqs:receivemessage on resource: arn:aws:sqs:&amp;lt;region-name&amp;gt;:&amp;lt;user&amp;gt;:databricks-auto-ingest-test because no resource-based policy allows the sqs:receivemessage action (Service: AmazonSQS; Status Code: 403; Error Code: AccessDenied;: null)&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class=""&gt;The documentation I referenced is this :&amp;nbsp;&lt;A href="https://docs.databricks.com/en/ingestion/auto-loader/file-notification-mode.html" target="_blank" rel="noopener"&gt;https://docs.databricks.com/en/ingestion/auto-loader/file-notification-mode.html&lt;/A&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 11 Jan 2024 02:33:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/56902#M30685</guid>
      <dc:creator>rt-slowth</dc:creator>
      <dc:date>2024-01-11T02:33:50Z</dc:date>
    </item>
    <item>
      <title>Re: AutoLoader File notification mode Configuration with AWS</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/56963#M30695</link>
      <description>&lt;P&gt;Hi, Looks like this is a&amp;nbsp;&lt;SPAN&gt;resource-based policy permission error. Could you please check if all the actions/permissions has been added properly in the&amp;nbsp;resource-based policy?&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jan 2024 16:39:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/56963#M30695</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2024-01-11T16:39:56Z</dc:date>
    </item>
    <item>
      <title>Re: AutoLoader File notification mode Configuration with AWS</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/57293#M30752</link>
      <description>&lt;P&gt;Hi,&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/26078"&gt;@Debayan&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;How should I check it?&lt;/P&gt;</description>
      <pubDate>Mon, 15 Jan 2024 05:34:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/57293#M30752</guid>
      <dc:creator>rt-slowth</dc:creator>
      <dc:date>2024-01-15T05:34:56Z</dc:date>
    </item>
    <item>
      <title>Re: AutoLoader File notification mode Configuration with AWS</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67683#M33417</link>
      <description>&lt;P&gt;Was this resolved? I run into the same issue&lt;/P&gt;</description>
      <pubDate>Tue, 30 Apr 2024 13:53:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67683#M33417</guid>
      <dc:creator>djhs</dc:creator>
      <dc:date>2024-04-30T13:53:35Z</dc:date>
    </item>
    <item>
      <title>Re: AutoLoader File notification mode Configuration with AWS</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67866#M33469</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/93775"&gt;@djhs&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;Yes, I solved it, I was trying to run File notification Mode on a Shared Cluster and that's what caused the problem.&lt;/P&gt;</description>
      <pubDate>Wed, 01 May 2024 23:26:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67866#M33469</guid>
      <dc:creator>rt-slowth</dc:creator>
      <dc:date>2024-05-01T23:26:11Z</dc:date>
    </item>
    <item>
      <title>Re: AutoLoader File notification mode Configuration with AWS</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67967#M33496</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/87732"&gt;@rt-slowth&lt;/a&gt;&amp;nbsp;, Could you please add little bit more details on how did you fix it ? I have a similar issue, What&amp;nbsp; did you change to avoid using shared cluster ?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 02 May 2024 18:06:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-file-notification-mode-configuration-with-aws/m-p/67967#M33496</guid>
      <dc:creator>Babu_Krishnan</dc:creator>
      <dc:date>2024-05-02T18:06:13Z</dc:date>
    </item>
  </channel>
</rss>

