<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to reduce storage space consumed by delta with many updates in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-reduce-storage-space-consumed-by-delta-with-many-updates/m-p/47857#M28212</link>
    <description>&lt;P&gt;Did you already solved this problem?&lt;/P&gt;</description>
    <pubDate>Tue, 03 Oct 2023 12:38:34 GMT</pubDate>
    <dc:creator>Jb11</dc:creator>
    <dc:date>2023-10-03T12:38:34Z</dc:date>
    <item>
      <title>How to reduce storage space consumed by delta with many updates</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-reduce-storage-space-consumed-by-delta-with-many-updates/m-p/14216#M8743</link>
      <description>&lt;P&gt;I have 1 delta table that I continuously append events into, and a 2nd delta table that I continuously merge into (streamed from the 1st table) that has unique ID's where properties are updated from the events (An ID represents a unique thing that gets many events). The actual data size of the 2nd table is ≈ 400MB, however due to delta versions it consumes ≈ 40GB. I have added vacuum every hour to the streaming process to keep it even this low. Any suggestions on how I can reduce this storage consumption further? I do not require the versioning. Ideally I could have some way to disable this while retaining the ability to MERGE.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Sep 2021 16:58:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-reduce-storage-space-consumed-by-delta-with-many-updates/m-p/14216#M8743</guid>
      <dc:creator>Greg</dc:creator>
      <dc:date>2021-09-28T16:58:48Z</dc:date>
    </item>
    <item>
      <title>Re: How to reduce storage space consumed by delta with many updates</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-reduce-storage-space-consumed-by-delta-with-many-updates/m-p/47857#M28212</link>
      <description>&lt;P&gt;Did you already solved this problem?&lt;/P&gt;</description>
      <pubDate>Tue, 03 Oct 2023 12:38:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-reduce-storage-space-consumed-by-delta-with-many-updates/m-p/47857#M28212</guid>
      <dc:creator>Jb11</dc:creator>
      <dc:date>2023-10-03T12:38:34Z</dc:date>
    </item>
  </channel>
</rss>

