<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic benchmark tpc-ds from external parquet hive structure in S# in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/benchmark-tpc-ds-from-external-parquet-hive-structure-in-s/m-p/52740#M1811</link>
    <description>&lt;P&gt;Hi I am just getting started in databricks would appreciate some help here.&lt;/P&gt;&lt;P&gt;I have 10TB TPCDS in S3 i a hive partition structure.&lt;BR /&gt;My goal is to benchmark a data bricks cluster on this data.&lt;/P&gt;&lt;P&gt;after setting all IAM credentials according to this&amp;nbsp;&lt;SPAN&gt;&lt;SPAN class=""&gt;&lt;A title="https://docs.databricks.com/en/data-governance/unity-catalog/manage-external-locations-and-credentials.html" href="https://docs.databricks.com/en/data-governance/unity-catalog/manage-external-locations-and-credentials.html" target="_blank" rel="noreferrer noopener"&gt;https://docs.databricks.com/en/data-governance/unity-catalog/manage-external-locations-and-credentials.html&lt;/A&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;I set bucket as external location in catalog I am trying now to load data from S3 but I am getting this error&lt;/P&gt;&lt;DIV class=""&gt;&amp;gt; Error loading files.&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;gt; parent external location for path `s3://326989250725-datasets/` does not exist.&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class=""&gt;what is the issue here? In general is this the correct approach? I would really like to use a hive like command to create external tables in s3 then execute on them in spark.&lt;/DIV&gt;</description>
    <pubDate>Fri, 17 Nov 2023 19:49:13 GMT</pubDate>
    <dc:creator>hillel1</dc:creator>
    <dc:date>2023-11-17T19:49:13Z</dc:date>
    <item>
      <title>benchmark tpc-ds from external parquet hive structure in S#</title>
      <link>https://community.databricks.com/t5/get-started-discussions/benchmark-tpc-ds-from-external-parquet-hive-structure-in-s/m-p/52740#M1811</link>
      <description>&lt;P&gt;Hi I am just getting started in databricks would appreciate some help here.&lt;/P&gt;&lt;P&gt;I have 10TB TPCDS in S3 i a hive partition structure.&lt;BR /&gt;My goal is to benchmark a data bricks cluster on this data.&lt;/P&gt;&lt;P&gt;after setting all IAM credentials according to this&amp;nbsp;&lt;SPAN&gt;&lt;SPAN class=""&gt;&lt;A title="https://docs.databricks.com/en/data-governance/unity-catalog/manage-external-locations-and-credentials.html" href="https://docs.databricks.com/en/data-governance/unity-catalog/manage-external-locations-and-credentials.html" target="_blank" rel="noreferrer noopener"&gt;https://docs.databricks.com/en/data-governance/unity-catalog/manage-external-locations-and-credentials.html&lt;/A&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;I set bucket as external location in catalog I am trying now to load data from S3 but I am getting this error&lt;/P&gt;&lt;DIV class=""&gt;&amp;gt; Error loading files.&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;gt; parent external location for path `s3://326989250725-datasets/` does not exist.&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class=""&gt;what is the issue here? In general is this the correct approach? I would really like to use a hive like command to create external tables in s3 then execute on them in spark.&lt;/DIV&gt;</description>
      <pubDate>Fri, 17 Nov 2023 19:49:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/benchmark-tpc-ds-from-external-parquet-hive-structure-in-s/m-p/52740#M1811</guid>
      <dc:creator>hillel1</dc:creator>
      <dc:date>2023-11-17T19:49:13Z</dc:date>
    </item>
  </channel>
</rss>

