<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: how to check table size by partition? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4517#M1218</link>
    <description>&lt;P&gt;@jin park​&amp;nbsp;:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You can use the Databricks Delta Lake SHOW TABLE EXTENDED command to get the size of each partition of the table. Here's an example:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
SHOW TABLE EXTENDED LIKE '&amp;lt;table_name&amp;gt;' 
PARTITION (&amp;lt;partition_column&amp;gt; = '&amp;lt;partition_value&amp;gt;') 
SELECT sizeInBytes&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Replace &amp;lt;table_name&amp;gt; with the name of your Delta table, &amp;lt;partition_column&amp;gt; with the name of your partition column, and &amp;lt;partition_value&amp;gt; with the specific partition value you want to check the size for. If you want to check the size for all partitions, omit the PARTITION clause.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You can also use the DESCRIBE DETAIL command to get similar information:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
DESCRIBE DETAIL &amp;lt;table_name&amp;gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;This will show you detailed information about the table, including the size of each partition.&lt;/P&gt;</description>
    <pubDate>Sat, 13 May 2023 15:57:50 GMT</pubDate>
    <dc:creator>Anonymous</dc:creator>
    <dc:date>2023-05-13T15:57:50Z</dc:date>
    <item>
      <title>how to check table size by partition?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4516#M1217</link>
      <description>&lt;P&gt;I want to check the size of the delta table by partition.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;As you can see, only the size of the table can be checked, but not by partition.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 11 May 2023 14:08:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4516#M1217</guid>
      <dc:creator>data_eng_hard</dc:creator>
      <dc:date>2023-05-11T14:08:57Z</dc:date>
    </item>
    <item>
      <title>Re: how to check table size by partition?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4517#M1218</link>
      <description>&lt;P&gt;@jin park​&amp;nbsp;:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You can use the Databricks Delta Lake SHOW TABLE EXTENDED command to get the size of each partition of the table. Here's an example:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
SHOW TABLE EXTENDED LIKE '&amp;lt;table_name&amp;gt;' 
PARTITION (&amp;lt;partition_column&amp;gt; = '&amp;lt;partition_value&amp;gt;') 
SELECT sizeInBytes&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Replace &amp;lt;table_name&amp;gt; with the name of your Delta table, &amp;lt;partition_column&amp;gt; with the name of your partition column, and &amp;lt;partition_value&amp;gt; with the specific partition value you want to check the size for. If you want to check the size for all partitions, omit the PARTITION clause.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You can also use the DESCRIBE DETAIL command to get similar information:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
DESCRIBE DETAIL &amp;lt;table_name&amp;gt;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;This will show you detailed information about the table, including the size of each partition.&lt;/P&gt;</description>
      <pubDate>Sat, 13 May 2023 15:57:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4517#M1218</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-05-13T15:57:50Z</dc:date>
    </item>
    <item>
      <title>Re: how to check table size by partition?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4518#M1219</link>
      <description>&lt;P&gt;There is no 'sizeInbytes' item.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 14 May 2023 07:57:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4518#M1219</guid>
      <dc:creator>data_eng_hard</dc:creator>
      <dc:date>2023-05-14T07:57:23Z</dc:date>
    </item>
    <item>
      <title>Re: how to check table size by partition?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4519#M1220</link>
      <description>&lt;P&gt;@jin park​&amp;nbsp;: Please try this&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;DESCRIBE DETAIL your_table_name PARTITION (partition_column = 'partition_value')&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Replace 'your_table_name' with the actual name of your table and specify the appropriate partition_column and partition_value you want to check.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 09 Jun 2023 07:23:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/4519#M1220</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-06-09T07:23:05Z</dc:date>
    </item>
    <item>
      <title>Re: how to check table size by partition?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/102376#M41084</link>
      <description>&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;The previous two answers did not work for me (DBX 15.4).&lt;BR /&gt;I found a hacky way using the delta log: f&lt;/SPAN&gt;&lt;SPAN&gt;ind latest (group of) checkpoint (parquet) file(s) in delta log and use it as source prefix `000000000000xxxxxxx.checkpoint`:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;PRE&gt;&lt;SPAN&gt;SELECT&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  partition_column_1,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  partition_column_2,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  round&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;sum&lt;/SPAN&gt;&lt;SPAN&gt;(size&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;SPAN&gt;2&lt;/SPAN&gt;&lt;SPAN&gt;) &lt;/SPAN&gt;&lt;SPAN&gt;AS&lt;/SPAN&gt;&lt;SPAN&gt; size_gb,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  count&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;*&lt;/SPAN&gt;&lt;SPAN&gt;) &lt;/SPAN&gt;&lt;SPAN&gt;AS&lt;/SPAN&gt;&lt;SPAN&gt; num_files,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  round&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;min&lt;/SPAN&gt;&lt;SPAN&gt;(size&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;SPAN&gt;2&lt;/SPAN&gt;&lt;SPAN&gt;) &lt;/SPAN&gt;&lt;SPAN&gt;AS&lt;/SPAN&gt;&lt;SPAN&gt; min_file_size_mb,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  round&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;max&lt;/SPAN&gt;&lt;SPAN&gt;(size&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;/&lt;/SPAN&gt;&lt;SPAN&gt;1000&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;SPAN&gt;2&lt;/SPAN&gt;&lt;SPAN&gt;) &lt;/SPAN&gt;&lt;SPAN&gt;AS&lt;/SPAN&gt;&lt;SPAN&gt; max_file_size_mb&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;FROM&lt;/SPAN&gt;&lt;SPAN&gt; (&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  SELECT&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;    add&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;partitionValues&lt;/SPAN&gt;&lt;SPAN&gt;.partition_column_1,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;    add&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;partitionValues&lt;/SPAN&gt;&lt;SPAN&gt;.partition_column_2,&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;    add&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;size&lt;/SPAN&gt; &lt;SPAN&gt;AS&lt;/SPAN&gt;&lt;SPAN&gt; size&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;  FROM&lt;/SPAN&gt;&lt;SPAN&gt; PARQUET.&lt;/SPAN&gt;&lt;SPAN&gt;`&lt;/SPAN&gt;&lt;SPAN&gt;s3://my-bucket/my_table/_delta_log/0000000000000xxxxxxx.checkpoint.*&lt;/SPAN&gt;&lt;SPAN&gt;`&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;WHERE&lt;/SPAN&gt; &lt;SPAN&gt;1&lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt;1&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;AND&lt;/SPAN&gt;&lt;SPAN&gt; partition_column_1 &lt;/SPAN&gt;&lt;SPAN&gt;IS NOT NULL&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;GROUP BY&lt;/SPAN&gt; &lt;SPAN&gt;GROUPING&lt;/SPAN&gt; &lt;SPAN&gt;SETS&lt;/SPAN&gt;&lt;SPAN&gt;((), (partition_column_1, partition_column_2))&lt;/SPAN&gt;&lt;BR /&gt;&lt;SPAN&gt;ORDER BY&lt;/SPAN&gt;&lt;SPAN&gt; size_gb &lt;/SPAN&gt;&lt;SPAN&gt;DESC&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/DIV&gt;</description>
      <pubDate>Tue, 17 Dec 2024 12:16:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-check-table-size-by-partition/m-p/102376#M41084</guid>
      <dc:creator>Carsten_Herbe</dc:creator>
      <dc:date>2024-12-17T12:16:39Z</dc:date>
    </item>
  </channel>
</rss>

