<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic OPTIMIZE in parallel with actual data load in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125831#M47535</link>
    <description>&lt;P&gt;Dear all&lt;/P&gt;&lt;P&gt;If I understand correctly,&amp;nbsp;OPTIMIZE cannot run in parallel with actual data load. We see 'concurrent update' errors in our environment if this happens; due to which we are unable to dedicate a maintenance window for the tables health.&lt;/P&gt;&lt;P&gt;And, I saw a presentation from DAIS 2025 that says liquid clustering can run in parallel with actual data load.&lt;/P&gt;&lt;P&gt;Please correct the understanding here.&lt;/P&gt;&lt;P&gt;Appreciate the mindshare...&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 21 Jul 2025 08:51:37 GMT</pubDate>
    <dc:creator>noorbasha534</dc:creator>
    <dc:date>2025-07-21T08:51:37Z</dc:date>
    <item>
      <title>OPTIMIZE in parallel with actual data load</title>
      <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125831#M47535</link>
      <description>&lt;P&gt;Dear all&lt;/P&gt;&lt;P&gt;If I understand correctly,&amp;nbsp;OPTIMIZE cannot run in parallel with actual data load. We see 'concurrent update' errors in our environment if this happens; due to which we are unable to dedicate a maintenance window for the tables health.&lt;/P&gt;&lt;P&gt;And, I saw a presentation from DAIS 2025 that says liquid clustering can run in parallel with actual data load.&lt;/P&gt;&lt;P&gt;Please correct the understanding here.&lt;/P&gt;&lt;P&gt;Appreciate the mindshare...&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 21 Jul 2025 08:51:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125831#M47535</guid>
      <dc:creator>noorbasha534</dc:creator>
      <dc:date>2025-07-21T08:51:37Z</dc:date>
    </item>
    <item>
      <title>Re: OPTIMIZE in parallel with actual data load</title>
      <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125835#M47537</link>
      <description>&lt;P&gt;Liquid clustering reorganizes data incrementally, which will work faster because it optimizes only new data. Compared to Z-order there is a different algorithm for data organization (Hilbert curve) that alows incremental.&lt;/P&gt;</description>
      <pubDate>Mon, 21 Jul 2025 09:02:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125835#M47537</guid>
      <dc:creator>MariuszK</dc:creator>
      <dc:date>2025-07-21T09:02:58Z</dc:date>
    </item>
    <item>
      <title>Re: OPTIMIZE in parallel with actual data load</title>
      <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125845#M47544</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/131065"&gt;@MariuszK&lt;/a&gt;&amp;nbsp; this does not answer my question. Can I run OPTIMIZE in parallel with the data load of a liquid clustered table?&lt;/P&gt;</description>
      <pubDate>Mon, 21 Jul 2025 09:57:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125845#M47544</guid>
      <dc:creator>noorbasha534</dc:creator>
      <dc:date>2025-07-21T09:57:55Z</dc:date>
    </item>
    <item>
      <title>Re: OPTIMIZE in parallel with actual data load</title>
      <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125849#M47546</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/124839"&gt;@noorbasha534&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Yes,&amp;nbsp;&lt;SPAN&gt;Liquid Clustering optimization can be executed on delta tables automatically or manually, at write time with&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://books.japila.pl/delta-lake-internals/auto-compaction/" target="_blank" rel="noopener"&gt;Auto Compaction&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;enabled or at any time using&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://books.japila.pl/delta-lake-internals/commands/optimize/" target="_blank" rel="noopener"&gt;OPTIMIZE&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;command, respectively.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://books.japila.pl/delta-lake-internals/liquid-clustering/" target="_blank" rel="noopener"&gt;Liquid Clustering - The Internals of Delta Lake&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Additionally, it is mentioned in below blog post. Look for clustering on-write:&lt;BR /&gt;&lt;BR /&gt;&lt;A href="https://www.databricks.com/blog/announcing-general-availability-liquid-clustering" target="_blank"&gt;Announcing General Availability of Liquid Clustering | Databricks Blog&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 21 Jul 2025 10:25:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125849#M47546</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-07-21T10:25:17Z</dc:date>
    </item>
    <item>
      <title>Re: OPTIMIZE in parallel with actual data load</title>
      <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125864#M47553</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/124839"&gt;@noorbasha534&lt;/a&gt;, This is a good question to clarify this topic. According to documentation, yes, but honestly speaking, I haven't had a chance to check it in the described scenario.&lt;/P&gt;</description>
      <pubDate>Mon, 21 Jul 2025 12:39:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125864#M47553</guid>
      <dc:creator>MariuszK</dc:creator>
      <dc:date>2025-07-21T12:39:50Z</dc:date>
    </item>
    <item>
      <title>Re: OPTIMIZE in parallel with actual data load</title>
      <link>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125903#M47572</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/131065"&gt;@MariuszK&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110502"&gt;@szymon_dybczak&lt;/a&gt;&amp;nbsp;thanks both. appreciate your support.&lt;/P&gt;</description>
      <pubDate>Mon, 21 Jul 2025 21:13:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimize-in-parallel-with-actual-data-load/m-p/125903#M47572</guid>
      <dc:creator>noorbasha534</dc:creator>
      <dc:date>2025-07-21T21:13:33Z</dc:date>
    </item>
  </channel>
</rss>

