from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark.sql import DataFrame, Column
from pyspark.sql.types import Row
import dlt
S3_PATH = 's3://datalake-lab/xxxx/'
S3_SCHEMA = 's3://datalake-lab/xxxx/schemas/'
@dlt.table
def test_raw():
raw_df = (
spark.readStream.format("cloudFiles")
.option("cloudFiles.format", "parquet")
.option("cloudFiles.schemaLocation", f"{S3_SCHEMA}")
.option("cloudFiles.useNotifications", "true")
.option("cloudFiles.region", "ap-northeast-1")
.option("cloudFile.roleArn", "<Instance Profile ARN>")
.option(
"cloudFiles.queueUrl",
"https://sqs.<region>.amazonaws.com/<account-id>/databricks-auto-ingest-demo",
)
.load(f"{S3_PATH}") # <-- Specify the path to the gzip files here
# .selectExpr("*", '_metadata.file_name as input_file_name', '_metadata.file_path as input_file_path')
)
return raw_df
com.amazonaws.services.sqs.model.AmazonSQSException: User: anonymous is not authorized to perform: sqs:receivemessage on resource: arn:aws:sqs:<region>:<account-id>:databricks-auto-ingest-demo because no resource-based policy allows the sqs:receivemessage action (Service: AmazonSQS; Status Code: 403;
What settings in AWS should I check?