<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Reading multiple parquet files from same _delta_log under a path in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31030#M22561</link>
    <description>&lt;P&gt;Not sure about the best but it helped me to think it differently which I was not aware of.&lt;/P&gt;</description>
    <pubDate>Fri, 25 Feb 2022 20:45:24 GMT</pubDate>
    <dc:creator>KKo</dc:creator>
    <dc:date>2022-02-25T20:45:24Z</dc:date>
    <item>
      <title>Reading multiple parquet files from same _delta_log under a path</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31025#M22556</link>
      <description>&lt;P&gt;I have a path where there is _delta_log and 3 snappy.parquet files. I am trying to read all those .parquet using spark.read.format('delta').load(path) but I am getting data from only one same file all the time. Can't I read from all these files? If so how to achieve this?&lt;/P&gt;</description>
      <pubDate>Mon, 24 Jan 2022 22:00:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31025#M22556</guid>
      <dc:creator>KKo</dc:creator>
      <dc:date>2022-01-24T22:00:16Z</dc:date>
    </item>
    <item>
      <title>Re: Reading multiple parquet files from same _delta_log under a path</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31027#M22558</link>
      <description>&lt;P&gt;the fact there are multiple parquet files does not mean all those files are 'active'.  Delta lake can do time travel, meaning you can roll back a delta table to a previous state.  To be able to do that, it needs the old data.&lt;/P&gt;&lt;P&gt;That is why old data is not removed, and you can see multiple parquet files which are not used in the most recent version of delta_lake.&lt;/P&gt;&lt;P&gt;you can remove them with the VACUUM command:&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.microsoft.com/en-us/azure/databricks/spark/latest/spark-sql/language-manual/delta-vacuum" alt="https://docs.microsoft.com/en-us/azure/databricks/spark/latest/spark-sql/language-manual/delta-vacuum" target="_blank"&gt;https://docs.microsoft.com/en-us/azure/databricks/spark/latest/spark-sql/language-manual/delta-vacuum&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 25 Jan 2022 12:29:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31027#M22558</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-01-25T12:29:02Z</dc:date>
    </item>
    <item>
      <title>Re: Reading multiple parquet files from same _delta_log under a path</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31028#M22559</link>
      <description>&lt;P&gt;@Werner Stinckens​&amp;nbsp;Thanks for the reply and explanation, that was helpful to understand the delta feature.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Feb 2022 13:42:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31028#M22559</guid>
      <dc:creator>KKo</dc:creator>
      <dc:date>2022-02-24T13:42:16Z</dc:date>
    </item>
    <item>
      <title>Re: Reading multiple parquet files from same _delta_log under a path</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31030#M22561</link>
      <description>&lt;P&gt;Not sure about the best but it helped me to think it differently which I was not aware of.&lt;/P&gt;</description>
      <pubDate>Fri, 25 Feb 2022 20:45:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-multiple-parquet-files-from-same-delta-log-under-a-path/m-p/31030#M22561</guid>
      <dc:creator>KKo</dc:creator>
      <dc:date>2022-02-25T20:45:24Z</dc:date>
    </item>
  </channel>
</rss>

