<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Error reading in Parquet file in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11947#M6848</link>
    <description>&lt;P&gt;This error may be related to credential issue.&lt;/P&gt;&lt;P&gt;You can try this code &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;spark.conf.set("fs.azure.account.auth.type.&amp;lt;storage-account-name&amp;gt;.dfs.core.windows.net", "&amp;lt;your-access-key&amp;gt;")
spark.read.parquet("abfss://............/..._2023-01-14T08:01:29.8549884Z.parquet")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;To hide &amp;lt;your-access-key&amp;gt; you can create Secret scopes follow the instructions link below:&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes" alt="https://learn.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes" target="_blank"&gt;Link to create Secret scopes.&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Sat, 14 Jan 2023 18:59:55 GMT</pubDate>
    <dc:creator>anpa</dc:creator>
    <dc:date>2023-01-14T18:59:55Z</dc:date>
    <item>
      <title>Error reading in Parquet file</title>
      <link>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11946#M6847</link>
      <description>&lt;P&gt;I am trying to read a .parqest file from a ADLS gen2 location in azure databricks . But facing the below error:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.read.parquet("abfss://............/..._2023-01-14T08:01:29.8549884Z.parquet")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3) (10.139.64.6 executor 0): org.apache.spark.SparkException: Exception thrown in awaitResult: &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I searched in google ( as per suggestion in some posts tried to set spark.driver.maxResultSize  to 20g , some blogs says to put inferSchema option ) but getting the same error again and again . The file size I am trying to read is 12kb . &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried with below runtime versions in my databricks cluster&lt;/P&gt;&lt;P&gt;11.3 LTS (includes Apache Spark 3.3.0, Scala 2.12)&lt;/P&gt;&lt;P&gt;11.1 (includes Apache Spark 3.3.0, Scala 2.12)&lt;/P&gt;&lt;P&gt;10.4 LTS (includes Apache Spark 3.2.1, Scala 2.12)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Can anyone please advise how to overcome this issue ?&lt;/P&gt;</description>
      <pubDate>Sat, 14 Jan 2023 12:09:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11946#M6847</guid>
      <dc:creator>BL</dc:creator>
      <dc:date>2023-01-14T12:09:25Z</dc:date>
    </item>
    <item>
      <title>Re: Error reading in Parquet file</title>
      <link>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11947#M6848</link>
      <description>&lt;P&gt;This error may be related to credential issue.&lt;/P&gt;&lt;P&gt;You can try this code &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;spark.conf.set("fs.azure.account.auth.type.&amp;lt;storage-account-name&amp;gt;.dfs.core.windows.net", "&amp;lt;your-access-key&amp;gt;")
spark.read.parquet("abfss://............/..._2023-01-14T08:01:29.8549884Z.parquet")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;To hide &amp;lt;your-access-key&amp;gt; you can create Secret scopes follow the instructions link below:&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes" alt="https://learn.microsoft.com/en-us/azure/databricks/security/secrets/secret-scopes" target="_blank"&gt;Link to create Secret scopes.&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 14 Jan 2023 18:59:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11947#M6848</guid>
      <dc:creator>anpa</dc:creator>
      <dc:date>2023-01-14T18:59:55Z</dc:date>
    </item>
    <item>
      <title>Re: Error reading in Parquet file</title>
      <link>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11948#M6849</link>
      <description>&lt;P&gt;Thanks for your answer .&lt;/P&gt;&lt;P&gt;But I was using same kind of code with access key &lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 14 Jan 2023 21:44:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11948#M6849</guid>
      <dc:creator>BL</dc:creator>
      <dc:date>2023-01-14T21:44:21Z</dc:date>
    </item>
    <item>
      <title>Re: Error reading in Parquet file</title>
      <link>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11949#M6850</link>
      <description>&lt;P&gt;I tried again , but the same error &lt;/P&gt;&lt;P&gt;spark.conf.set("fs.azure.account.key.&amp;lt;ContainerName&amp;gt;.dfs.core.windows.net",ACCESS_KEY)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.read.parquet("abfss://............/..._2023-01-14T08:01:29.8549884Z.parquet")&lt;/P&gt;</description>
      <pubDate>Sat, 14 Jan 2023 21:54:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11949#M6850</guid>
      <dc:creator>BL</dc:creator>
      <dc:date>2023-01-14T21:54:36Z</dc:date>
    </item>
    <item>
      <title>Re: Error reading in Parquet file</title>
      <link>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11950#M6851</link>
      <description>&lt;P&gt;Can you access the executor logs? When you cluster is up and running, you can access the executor's logs. For example, the error shows:&lt;/P&gt;&lt;P&gt;org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage 0.0 (TID 3) (10.139.64.6 executor 0): org.apache.spark.SparkException: Exception thrown in awaitResult:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Go to the Executor 0 and check why it failed&lt;/P&gt;</description>
      <pubDate>Mon, 30 Jan 2023 22:51:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-reading-in-parquet-file/m-p/11950#M6851</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2023-01-30T22:51:18Z</dc:date>
    </item>
  </channel>
</rss>

