<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Get total size of data in a catalog and schema in Unity Catalog in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10246#M5469</link>
    <description>&lt;P&gt;For a KPI dashboard, we need to know the exact size of the data in a catalog and also all schemas inside the catalogs. What is the best way to do this? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We tried to iterate over all tables and sum the sizeInBytes using the DESCRIBE DETAIL command for the tables. However, since we have a lot of tables, it takes a really long time. &lt;/P&gt;&lt;P&gt;We also tried looking in the information_schema databases for all the catalogs but couldn't find such information there. &lt;/P&gt;</description>
    <pubDate>Wed, 01 Feb 2023 16:05:29 GMT</pubDate>
    <dc:creator>apingle</dc:creator>
    <dc:date>2023-02-01T16:05:29Z</dc:date>
    <item>
      <title>Get total size of data in a catalog and schema in Unity Catalog</title>
      <link>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10246#M5469</link>
      <description>&lt;P&gt;For a KPI dashboard, we need to know the exact size of the data in a catalog and also all schemas inside the catalogs. What is the best way to do this? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We tried to iterate over all tables and sum the sizeInBytes using the DESCRIBE DETAIL command for the tables. However, since we have a lot of tables, it takes a really long time. &lt;/P&gt;&lt;P&gt;We also tried looking in the information_schema databases for all the catalogs but couldn't find such information there. &lt;/P&gt;</description>
      <pubDate>Wed, 01 Feb 2023 16:05:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10246#M5469</guid>
      <dc:creator>apingle</dc:creator>
      <dc:date>2023-02-01T16:05:29Z</dc:date>
    </item>
    <item>
      <title>Re: Get total size of data in a catalog and schema in Unity Catalog</title>
      <link>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10247#M5470</link>
      <description>&lt;P&gt;@Anant Pingle​&amp;nbsp;: Please try using Databricks' Metadata API. This API provides programmatic access to metadata about Databricks objects such as tables, views, and databases.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql.functions import sum
&amp;nbsp;
# Replace "my_catalog" with the name of your catalog
catalog_name = "my_catalog"
&amp;nbsp;
# Get a list of all tables in the catalog
tables = spark.catalog.listTables(catalog_name)
&amp;nbsp;
# Compute the size of each table and sum them up
total_size = sum([spark.table(table.database + "." + table.name).count() for table in tables])
&amp;nbsp;
print(f"The total size of {catalog_name} is {total_size} rows.")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Link to the API documentation: &lt;A href="https://docs.databricks.com/dev-tools/api/latest/workspace.html?_ga=2.57281449.280315152.1678105734-2075619882.1678105734" alt="https://docs.databricks.com/dev-tools/api/latest/workspace.html?_ga=2.57281449.280315152.1678105734-2075619882.1678105734" target="_blank"&gt;https://docs.databricks.com/dev-tools/api/latest/workspace.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 09 Mar 2023 01:56:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10247#M5470</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-03-09T01:56:49Z</dc:date>
    </item>
    <item>
      <title>Re: Get total size of data in a catalog and schema in Unity Catalog</title>
      <link>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10248#M5471</link>
      <description>&lt;P&gt;Hi @Anant Pingle​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you for posting your question in our community! We are happy to assist you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;To help us provide you with the most accurate information, could you please take a moment to review the responses and select the one that best answers your question?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;This will also help other community members who may have similar questions in the future. Thank you for your participation and let us know if you need any further assistance!&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 09 Apr 2023 02:09:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/get-total-size-of-data-in-a-catalog-and-schema-in-unity-catalog/m-p/10248#M5471</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-04-09T02:09:23Z</dc:date>
    </item>
  </channel>
</rss>

