<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic optimizeWrite takes too long in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/optimizewrite-takes-too-long/m-p/50435#M28809</link>
    <description>&lt;P&gt;Hi ,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We have a spark job write data in delta table for last 90 date partition. We have enabled&amp;nbsp;&lt;SPAN&gt;spark.databricks.delta.autoCompact.enabled and&amp;nbsp;delta.autoOptimize.optimizeWrite. Job takes 50 mins to complete. In that logic takes 12 mins and optimizewrite takes 37 mins to complete. Is any way to reduce total job time as the output per partition is 64mb file&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;We are using DBT 12 .&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Sat, 04 Nov 2023 11:35:05 GMT</pubDate>
    <dc:creator>svrdragon</dc:creator>
    <dc:date>2023-11-04T11:35:05Z</dc:date>
    <item>
      <title>optimizeWrite takes too long</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizewrite-takes-too-long/m-p/50435#M28809</link>
      <description>&lt;P&gt;Hi ,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;We have a spark job write data in delta table for last 90 date partition. We have enabled&amp;nbsp;&lt;SPAN&gt;spark.databricks.delta.autoCompact.enabled and&amp;nbsp;delta.autoOptimize.optimizeWrite. Job takes 50 mins to complete. In that logic takes 12 mins and optimizewrite takes 37 mins to complete. Is any way to reduce total job time as the output per partition is 64mb file&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;We are using DBT 12 .&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 04 Nov 2023 11:35:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizewrite-takes-too-long/m-p/50435#M28809</guid>
      <dc:creator>svrdragon</dc:creator>
      <dc:date>2023-11-04T11:35:05Z</dc:date>
    </item>
  </channel>
</rss>

