<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13832#M728</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have a cluster running in us-east-1 region.&lt;/P&gt;&lt;P&gt;I hava a Spark job loading data in a DataFrame using s3select format on a bucket in eu-west-1 region.&lt;/P&gt;&lt;P&gt;Access and Secret keys are encoded in URI s3a://$AccessKey:$SecretKey@bucket/path/to/dir&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Job fails with followong stacktrace &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: eu-west-1. Please use this region to retry the request (Service: Amazon S3; Status Code: 301; Error Code: PermanentRedirect; Request ID: 1TTFZ54B0757A901; S3 Extended Request ID: TMqeVLFYG/b1mLVoLlSRqCMYuNbYj+cSSKneAde2/Lis7WSBvSuq98KsTcdc6SGvZHwET8GOnRs=; Proxy: null), S3 Extended Request ID: TMqeVLFYG/b1mLVoLlSRqCMYuNbYj+cSSKneAde2/Lis7WSBvSuq98KsTcdc6SGvZHwET8GOnRs=
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1862)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1415)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1154)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:811)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:779)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:753)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:713)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:695)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:559)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:539)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5453)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5400)
	at com.amazonaws.services.s3.AmazonS3Client.selectObjectContent(AmazonS3Client.java:3221)
	at com.databricks.io.s3select.S3SelectDataSource$.readFileFromS3(S3SelectDataSource.scala:238)
	at com.databricks.io.s3select.S3SelectDataSource$.readFile(S3SelectDataSource.scala:284)
	at com.databricks.io.s3select.S3SelectFileFormat.$anonfun$buildReader$2(S3SelectFileFormat.scala:88)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:157)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:144)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:525)
	... 37 more&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I tried to set &lt;I&gt;spark.hadoop.fs.s3a.bucket.&amp;lt;my-bucket&amp;gt;.endpoint&lt;/I&gt; to &lt;I&gt;s3.eu-west-1.amazonaws.com&lt;/I&gt; in cluster config without success.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any advice ?&lt;/P&gt;</description>
    <pubDate>Tue, 03 Jan 2023 18:02:14 GMT</pubDate>
    <dc:creator>lbourgeois</dc:creator>
    <dc:date>2023-01-03T18:02:14Z</dc:date>
    <item>
      <title>com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13832#M728</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have a cluster running in us-east-1 region.&lt;/P&gt;&lt;P&gt;I hava a Spark job loading data in a DataFrame using s3select format on a bucket in eu-west-1 region.&lt;/P&gt;&lt;P&gt;Access and Secret keys are encoded in URI s3a://$AccessKey:$SecretKey@bucket/path/to/dir&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Job fails with followong stacktrace &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;Caused by: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: eu-west-1. Please use this region to retry the request (Service: Amazon S3; Status Code: 301; Error Code: PermanentRedirect; Request ID: 1TTFZ54B0757A901; S3 Extended Request ID: TMqeVLFYG/b1mLVoLlSRqCMYuNbYj+cSSKneAde2/Lis7WSBvSuq98KsTcdc6SGvZHwET8GOnRs=; Proxy: null), S3 Extended Request ID: TMqeVLFYG/b1mLVoLlSRqCMYuNbYj+cSSKneAde2/Lis7WSBvSuq98KsTcdc6SGvZHwET8GOnRs=
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1862)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1415)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1154)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:811)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:779)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:753)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:713)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:695)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:559)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:539)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5453)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5400)
	at com.amazonaws.services.s3.AmazonS3Client.selectObjectContent(AmazonS3Client.java:3221)
	at com.databricks.io.s3select.S3SelectDataSource$.readFileFromS3(S3SelectDataSource.scala:238)
	at com.databricks.io.s3select.S3SelectDataSource$.readFile(S3SelectDataSource.scala:284)
	at com.databricks.io.s3select.S3SelectFileFormat.$anonfun$buildReader$2(S3SelectFileFormat.scala:88)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:157)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:144)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:525)
	... 37 more&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I tried to set &lt;I&gt;spark.hadoop.fs.s3a.bucket.&amp;lt;my-bucket&amp;gt;.endpoint&lt;/I&gt; to &lt;I&gt;s3.eu-west-1.amazonaws.com&lt;/I&gt; in cluster config without success.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any advice ?&lt;/P&gt;</description>
      <pubDate>Tue, 03 Jan 2023 18:02:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13832#M728</guid>
      <dc:creator>lbourgeois</dc:creator>
      <dc:date>2023-01-03T18:02:14Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13833#M729</link>
      <description>&lt;P&gt;Maybe these resources will help:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Access S3 buckets with URIs and AWS keys &lt;A href="https://docs.databricks.com/external-data/amazon-s3.html#access-s3-buckets-with-uris-and-aws-keys" target="test_blank"&gt;https://docs.databricks.com/external-data/amazon-s3.html#access-s3-buckets-with-uris-and-aws-keys&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you are using the unity catalog and S3 buckets are inside the same account you can register them as external locations &lt;A href="https://docs.databricks.com/data-governance/unity-catalog/manage-external-locations-and-credentials.html#manage-external-locations" target="test_blank"&gt;https://docs.databricks.com/data-governance/unity-catalog/manage-external-locations-and-credentials.html#manage-external-locations&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 03 Jan 2023 21:35:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13833#M729</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-03T21:35:59Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13834#M730</link>
      <description>&lt;P&gt;Thanks @Hubert Dudek​&amp;nbsp;for having a look.&lt;/P&gt;&lt;P&gt;I don't use unity catalog, I actually use &lt;I&gt;3.Encode keys in URI &lt;/I&gt;option for S3 Auth as described in &lt;A href="https://docs.databricks.com/external-data/amazon-s3-select.html#s3-authentication" target="test_blank"&gt;https://docs.databricks.com/external-data/amazon-s3-select.html#s3-authentication&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Strange thing is that if I change format to &lt;I&gt;csv &lt;/I&gt;in DataFrameReader I don't face this issue (even without specifying any region or endpoint). What I wonder is :&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;is there any limitation around region when using s3 select connector ?&lt;/LI&gt;&lt;LI&gt;if know how to specify a different region to avoid this exception ?&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Wed, 04 Jan 2023 11:13:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13834#M730</guid>
      <dc:creator>lbourgeois</dc:creator>
      <dc:date>2023-01-04T11:13:26Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13835#M731</link>
      <description>&lt;P&gt;Maybe share your code as I haven't noticed s3select format and even don't know what is it &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 04 Jan 2023 11:23:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13835#M731</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-04T11:23:41Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13836#M732</link>
      <description>&lt;P&gt;Sure, I reproduced the issue  on a notebook. Here is the code snippet to create a Dataset with s3select and csv formats :&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;val s3selectDS = spark.read.format("s3select").schema(mySchema)
.load("s3://"+accessKey+":"+secretKey+"@lbourgeois-rd/s3selectdbrcsv")
val csvDS = spark.read.format("csv").schema(mySchema)
.load("s3://"+accessKey+":"+secretKey+"@lbourgeois-rd/s3selectdbrcsv")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;As you can see only the format arg is different.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Displaying csvDS works fine &lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/934i7ED9D0DB4A4BAA70/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;Displaying s3selectDS raises the issue&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/930i5A80007AC8BF5A02/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 04 Jan 2023 14:14:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13836#M732</guid>
      <dc:creator>lbourgeois</dc:creator>
      <dc:date>2023-01-04T14:14:39Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13837#M733</link>
      <description>&lt;P&gt;You can try to set S3 Gateway so that it will be in the VPC network. that feature is for free &lt;A href="https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html" target="test_blank"&gt;https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.databricks.com/blog/2022/11/08/optimizing-aws-s3-access-databricks.html" target="test_blank"&gt;https://www.databricks.com/blog/2022/11/08/optimizing-aws-s3-access-databricks.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jan 2023 10:01:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13837#M733</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-05T10:01:23Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13839#M735</link>
      <description>&lt;P&gt;Hi @Hubert Dudek​&amp;nbsp;and @47kappal​&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Sorry for the delay. As suggested I'm trying to setup a gateway endpoint for s3 following &lt;A href="https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html" target="test_blank"&gt;https://docs.aws.amazon.com/vpc/latest/privatelink/vpc-endpoints-s3.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am a bit confused by &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;A gateway endpoint is available only in the Region where you created it. Be sure to create your gateway endpoint in the same Region as your S3 buckets.&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;In my case the vpc used by the cluster (and in which the gateway will be created) is us-east-1 while s3 bucket is in eu-west-1 so above statement can't be respected (bucket and gateway won't be in same region)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am also confused by the fact that iw works with format(csv) but not with format(s3select). I wonder about limitations with s3 select connector.&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jan 2023 18:25:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13839#M735</guid>
      <dc:creator>lbourgeois</dc:creator>
      <dc:date>2023-01-23T18:25:51Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13840#M736</link>
      <description>&lt;P&gt;It seems that you need to create vpc in another region and peer it with your main region &lt;A href="https://aws.amazon.com/premiumsupport/knowledge-center/vpc-endpoints-cross-region-aws-services/" target="test_blank"&gt;https://aws.amazon.com/premiumsupport/knowledge-center/vpc-endpoints-cross-region-aws-services/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;s3select is a completely different connector, optimized to take only part of the file from s3 bucket so it is different library&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jan 2023 19:10:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13840#M736</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-23T19:10:36Z</dc:date>
    </item>
    <item>
      <title>Re: com.amazonaws.services.s3.model.AmazonS3Exception: The bucket is in this region: *** when using S3 Select</title>
      <link>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13841#M737</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried your suggestion by setting up the peering connection between the 2 VPC but issue remains the same.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The error message &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;I&gt;The bucket is in this region: .... please use this region to retry the request&lt;/I&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;makes me think that the root cause is not at network level but at S3 Select Spark connector level which does not use correct regional s3 enpoint.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The connector does not seem to have such property : &lt;A href="https://docs.databricks.com/external-data/amazon-s3-select.html" alt="https://docs.databricks.com/external-data/amazon-s3-select.html" target="_blank"&gt;https://docs.databricks.com/external-data/amazon-s3-select.html&lt;/A&gt; doc&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Then I tried to set following properties at Spark level as usually suggested in such situation without any effect :&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.conf.set("fs.s3a.endpoint","&lt;A href="https://s3.eu-west-1.amazonaws.com" alt="https://s3.eu-west-1.amazonaws.com" target="_blank"&gt;s3.eu-west-1.amazonaws.com&lt;/A&gt;")&lt;/P&gt;&lt;P&gt;spark.conf.set("fs.s3n.endpoint","&lt;A href="https://s3.eu-west-1.amazonaws.com" alt="https://s3.eu-west-1.amazonaws.com" target="_blank"&gt;s3.eu-west-1.amazonaws.com&lt;/A&gt;")&lt;/P&gt;&lt;P&gt;spark.conf.set("fs.s3.endpoint","&lt;A href="https://s3.eu-west-1.amazonaws.com" alt="https://s3.eu-west-1.amazonaws.com" target="_blank"&gt;s3.eu-west-1.amazonaws.com&lt;/A&gt;")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It seems that the S3 Select connector does not forward this endpoint setting to the underlying AWS S3 SDK&lt;/P&gt;</description>
      <pubDate>Thu, 26 Jan 2023 08:31:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/com-amazonaws-services-s3-model-amazons3exception-the-bucket-is/m-p/13841#M737</guid>
      <dc:creator>lbourgeois</dc:creator>
      <dc:date>2023-01-26T08:31:48Z</dc:date>
    </item>
  </channel>
</rss>

