<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: What's the best practice on running ANALYZE on Delta Tables for query performance optimization? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26686#M18709</link>
    <description>&lt;P&gt;Nicely Written&lt;/P&gt;</description>
    <pubDate>Tue, 08 Jun 2021 11:10:01 GMT</pubDate>
    <dc:creator>User16826994223</dc:creator>
    <dc:date>2021-06-08T11:10:01Z</dc:date>
    <item>
      <title>What's the best practice on running ANALYZE on Delta Tables for query performance optimization?</title>
      <link>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26685#M18708</link>
      <description />
      <pubDate>Fri, 28 May 2021 18:50:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26685#M18708</guid>
      <dc:creator>aladda</dc:creator>
      <dc:date>2021-05-28T18:50:52Z</dc:date>
    </item>
    <item>
      <title>Re: What's the best practice on running ANALYZE on Delta Tables for query performance optimization?</title>
      <link>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26686#M18709</link>
      <description>&lt;P&gt;Nicely Written&lt;/P&gt;</description>
      <pubDate>Tue, 08 Jun 2021 11:10:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26686#M18709</guid>
      <dc:creator>User16826994223</dc:creator>
      <dc:date>2021-06-08T11:10:01Z</dc:date>
    </item>
    <item>
      <title>Re: What's the best practice on running ANALYZE on Delta Tables for query performance optimization?</title>
      <link>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26687#M18710</link>
      <description>&lt;UL&gt;&lt;LI&gt;The ANALYZE Command specifically captures statistics which are relevant for the Cost Based Optimizer to make better decisions.&lt;/LI&gt;&lt;LI&gt;The 32 columns of statistics that Delta auto-collects are specifically for data skipping. This is separate from the ANALYZE command&lt;/LI&gt;&lt;LI&gt;The reason &lt;A href="https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-analyze-table.html" alt="https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-analyze-table.html" target="_blank"&gt;docs&lt;/A&gt; currently say Do not run on Delta tables’ is because Its best to run Analyze on Delta tables after completion of any data update/delete operation and when the data has changed by around 10%. This gives the CBO the best and most up-to-date statistics to work with&lt;/LI&gt;&lt;LI&gt;General best practices:&lt;UL&gt;&lt;LI&gt;ANALYZE whenever the data has changed by about 10%&lt;/LI&gt;&lt;LI&gt;Make sure when you use ANALYZE, you are specifying the COLUMNS or PARTITIONS you want to collect statistics for.&amp;nbsp;Otherwise, as you have noted, it will re-analyze the entire table&lt;/LI&gt;&lt;LI&gt;Here is the syntax: &lt;A href="https://docs.databricks.com/spark/latest/spark-sql/language-manual/sql-ref-syntax-aux-analyze-table.html" alt="https://docs.databricks.com/spark/latest/spark-sql/language-manual/sql-ref-syntax-aux-analyze-table.html" target="_blank"&gt;&lt;U&gt;&lt;/U&gt;&lt;/A&gt;&lt;A href="https://docs.databricks.com/spark/latest/spark-sql/language-manual/sql-ref-syntax-aux-analyze-table.html" target="test_blank"&gt;https://docs.databricks.com/spark/latest/spark-sql/language-manual/sql-ref-syntax-aux-analyze-table.html&lt;/A&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 20 Jun 2021 03:21:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/26687#M18710</guid>
      <dc:creator>aladda</dc:creator>
      <dc:date>2021-06-20T03:21:49Z</dc:date>
    </item>
    <item>
      <title>Re: What's the best practice on running ANALYZE on Delta Tables for query performance optimization?</title>
      <link>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/68674#M33717</link>
      <description>&lt;P&gt;Super write-up; very useful in understanding how the Delta and non-Delta approaches have evolved.&lt;/P&gt;</description>
      <pubDate>Thu, 09 May 2024 17:10:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-s-the-best-practice-on-running-analyze-on-delta-tables-for/m-p/68674#M33717</guid>
      <dc:creator>jlickt</dc:creator>
      <dc:date>2024-05-09T17:10:51Z</dc:date>
    </item>
  </channel>
</rss>

