<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Preferred way to read S3 - dbutils or Boto3 or better solution ? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/preferred-way-to-read-s3-dbutils-or-boto3-or-better-solution/m-p/81448#M36309</link>
    <description>&lt;P&gt;Create IAM role in AWS S3 and use those credentials to connect to Databricks by using the below code&lt;BR /&gt;&lt;BR /&gt;AWS_SECRET_ACCESS_KEY={{secrets/scope/aws_secret_access_key}}&lt;BR /&gt;AWS_ACCESS_KEY_ID={{secrets/scope/aws_access_key_id}}&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;aws_bucket_name = "my-s3-bucket"&lt;/P&gt;&lt;P&gt;df = spark.read.load(f"/aws_bucket_name /s3path/")&lt;BR /&gt;display(df)&lt;/P&gt;</description>
    <pubDate>Thu, 01 Aug 2024 08:18:40 GMT</pubDate>
    <dc:creator>Kannathasan</dc:creator>
    <dc:date>2024-08-01T08:18:40Z</dc:date>
    <item>
      <title>Preferred way to read S3 - dbutils or Boto3 or better solution ?</title>
      <link>https://community.databricks.com/t5/data-engineering/preferred-way-to-read-s3-dbutils-or-boto3-or-better-solution/m-p/81396#M36293</link>
      <description>&lt;P&gt;We have a usecase where table has 15K rows , one of the column has S3 location. We need to read each row from table and fetch s3 location from one of the column,read&amp;nbsp; its content from s3. To read the content from S3 , workflow is taking lot of time, tried with 96Gb cluster. We tried with both options Boto3 and dbutils.fs.head , both taking around 30 mins. Any better suggestion/solution available.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 01 Aug 2024 01:48:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/preferred-way-to-read-s3-dbutils-or-boto3-or-better-solution/m-p/81396#M36293</guid>
      <dc:creator>Neli</dc:creator>
      <dc:date>2024-08-01T01:48:56Z</dc:date>
    </item>
    <item>
      <title>Re: Preferred way to read S3 - dbutils or Boto3 or better solution ?</title>
      <link>https://community.databricks.com/t5/data-engineering/preferred-way-to-read-s3-dbutils-or-boto3-or-better-solution/m-p/81448#M36309</link>
      <description>&lt;P&gt;Create IAM role in AWS S3 and use those credentials to connect to Databricks by using the below code&lt;BR /&gt;&lt;BR /&gt;AWS_SECRET_ACCESS_KEY={{secrets/scope/aws_secret_access_key}}&lt;BR /&gt;AWS_ACCESS_KEY_ID={{secrets/scope/aws_access_key_id}}&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;aws_bucket_name = "my-s3-bucket"&lt;/P&gt;&lt;P&gt;df = spark.read.load(f"/aws_bucket_name /s3path/")&lt;BR /&gt;display(df)&lt;/P&gt;</description>
      <pubDate>Thu, 01 Aug 2024 08:18:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/preferred-way-to-read-s3-dbutils-or-boto3-or-better-solution/m-p/81448#M36309</guid>
      <dc:creator>Kannathasan</dc:creator>
      <dc:date>2024-08-01T08:18:40Z</dc:date>
    </item>
  </channel>
</rss>

