<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Reading snappy.parquet in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/134720#M50173</link>
    <description>&lt;P&gt;Hello good day&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/188931"&gt;@Hritik_Moon&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;That&amp;nbsp;&lt;SPAN&gt;incompatible format is expected as when you try to read in parquet because of presence of delta_log created with delta format which follows acid principals its like&amp;nbsp;AnalysisException.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;recommended would be read in delta format only&amp;nbsp;&lt;/P&gt;&lt;P&gt;else: the alternative would be copy those&amp;nbsp;.snappy.parquet files or file into a desired folder and read them seperately.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Let me share a medium article I found for this issue:&amp;nbsp;&lt;BR /&gt;&lt;A href="https://medium.com/%40ishanpradhan/how-to-read-a-snappy-parquet-file-in-databricks-696538cd0efc" target="_blank"&gt;https://medium.com/%40ishanpradhan/how-to-read-a-snappy-parquet-file-in-databricks-696538cd0efc&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you.&amp;nbsp;&lt;BR /&gt;I am waiting for the solution from other contributors as well. they can share their approach.&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 13 Oct 2025 10:17:16 GMT</pubDate>
    <dc:creator>Khaja_Zaffer</dc:creator>
    <dc:date>2025-10-13T10:17:16Z</dc:date>
    <item>
      <title>Reading snappy.parquet</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/134718#M50172</link>
      <description>&lt;P&gt;I stored a dataframe as delta in the catalog. It created multiple folders with snappy.parquet files. Is there a way to read these snappy.parquet files.&lt;/P&gt;&lt;P&gt;it reads with pandas but with spark it gives error "incompatible format"&lt;/P&gt;</description>
      <pubDate>Mon, 13 Oct 2025 09:58:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/134718#M50172</guid>
      <dc:creator>Hritik_Moon</dc:creator>
      <dc:date>2025-10-13T09:58:08Z</dc:date>
    </item>
    <item>
      <title>Re: Reading snappy.parquet</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/134720#M50173</link>
      <description>&lt;P&gt;Hello good day&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/188931"&gt;@Hritik_Moon&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;That&amp;nbsp;&lt;SPAN&gt;incompatible format is expected as when you try to read in parquet because of presence of delta_log created with delta format which follows acid principals its like&amp;nbsp;AnalysisException.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;recommended would be read in delta format only&amp;nbsp;&lt;/P&gt;&lt;P&gt;else: the alternative would be copy those&amp;nbsp;.snappy.parquet files or file into a desired folder and read them seperately.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Let me share a medium article I found for this issue:&amp;nbsp;&lt;BR /&gt;&lt;A href="https://medium.com/%40ishanpradhan/how-to-read-a-snappy-parquet-file-in-databricks-696538cd0efc" target="_blank"&gt;https://medium.com/%40ishanpradhan/how-to-read-a-snappy-parquet-file-in-databricks-696538cd0efc&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you.&amp;nbsp;&lt;BR /&gt;I am waiting for the solution from other contributors as well. they can share their approach.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 13 Oct 2025 10:17:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/134720#M50173</guid>
      <dc:creator>Khaja_Zaffer</dc:creator>
      <dc:date>2025-10-13T10:17:16Z</dc:date>
    </item>
    <item>
      <title>Re: Reading snappy.parquet</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/135187#M50298</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/188931"&gt;@Hritik_Moon&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Try to read the file as delta.&amp;nbsp;&lt;/P&gt;&lt;P&gt;path/delta_file_name/&lt;BR /&gt;- parquet files&lt;BR /&gt;- delta_log/&lt;/P&gt;&lt;P&gt;since you are using spark, use this, spark.read.format("delta").load("path/delta_file_name").&lt;/P&gt;&lt;P&gt;Delta internally stores the data as parquet and delta log contains the metadata of transactions. You don't need to touch these files unless you are experimenting. &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;P&gt;For more info, please go through this,&amp;nbsp;&lt;A href="https://docs.databricks.com/aws/en/delta/tutorial" target="_blank"&gt;https://docs.databricks.com/aws/en/delta/tutorial&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;Hope this solved your issue.&lt;/P&gt;</description>
      <pubDate>Fri, 17 Oct 2025 04:24:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-snappy-parquet/m-p/135187#M50298</guid>
      <dc:creator>Prajapathy_NKR</dc:creator>
      <dc:date>2025-10-17T04:24:05Z</dc:date>
    </item>
  </channel>
</rss>

