<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to get read/write bytes per table using Databricks system tables? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145069#M52452</link>
    <description>&lt;P&gt;Actually, I was importing it from Azure Metrics and joining the folder with the location from information schema but maybe there is easier way&lt;/P&gt;</description>
    <pubDate>Fri, 23 Jan 2026 22:28:23 GMT</pubDate>
    <dc:creator>Hubert-Dudek</dc:creator>
    <dc:date>2026-01-23T22:28:23Z</dc:date>
    <item>
      <title>How to get read/write bytes per table using Databricks system tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145048#M52448</link>
      <description>&lt;P class=""&gt;I’m working on a&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;data usage &lt;/STRONG&gt;use case and want to understand the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;right way&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;to get&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;read bytes&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;and&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;written bytes&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;per table in Databricks, especially for Unity Catalog tables.&lt;/P&gt;&lt;H2&gt;What I want&lt;/H2&gt;&lt;P class=""&gt;For each table, something like:&lt;/P&gt;&lt;UL class=""&gt;&lt;LI&gt;&lt;P class=""&gt;Date&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Table name (catalog.schema.table)&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Operation type (read/write)&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;User&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Bytes read&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Bytes written&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;Initially, I assumed I could get this directly from the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;UC audit logs&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;via the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;system.access.audit&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;system table, because that’s the “audit log” source. But while exploring, I realized a few things are missing&lt;/P&gt;&lt;OL class=""&gt;&lt;LI&gt;&lt;P class=""&gt;&lt;STRONG&gt;system.access.audit&lt;/STRONG&gt;&lt;/P&gt;&lt;UL class=""&gt;&lt;LI&gt;&lt;P class=""&gt;Focused on&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;governance and admin actions&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;(e.g., grants, ownership changes, some access checks).&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Great for “who did what” from a security perspective.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;Does&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;not&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;expose per-table&lt;SPAN&gt;&amp;nbsp;&lt;STRONG&gt;read bytes&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;STRONG&gt;&amp;nbsp;/ written_bytes&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;metrics.&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P class=""&gt;&lt;STRONG&gt;Is there any direct way from&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;system.access.audit&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;to get per-table read/write bytes, or is&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;audit&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;intentionally scoped only to governance events (and not IO metrics)?&lt;/P&gt;</description>
      <pubDate>Fri, 23 Jan 2026 18:13:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145048#M52448</guid>
      <dc:creator>Danish11052000</dc:creator>
      <dc:date>2026-01-23T18:13:30Z</dc:date>
    </item>
    <item>
      <title>Re: How to get read/write bytes per table using Databricks system tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145069#M52452</link>
      <description>&lt;P&gt;Actually, I was importing it from Azure Metrics and joining the folder with the location from information schema but maybe there is easier way&lt;/P&gt;</description>
      <pubDate>Fri, 23 Jan 2026 22:28:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145069#M52452</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2026-01-23T22:28:23Z</dc:date>
    </item>
    <item>
      <title>Re: How to get read/write bytes per table using Databricks system tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145082#M52454</link>
      <description>&lt;P&gt;System Audit tables are for account activity tracking &amp;amp; security.&lt;/P&gt;&lt;P&gt;For IO details&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;You can use query history &amp;amp; lineage. Add usage attribution code to get details at table level&lt;/LI&gt;&lt;LI&gt;You can use table logs to get the write info along with other details.&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Sat, 24 Jan 2026 05:28:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145082#M52454</guid>
      <dc:creator>balajij8</dc:creator>
      <dc:date>2026-01-24T05:28:21Z</dc:date>
    </item>
    <item>
      <title>Re: How to get read/write bytes per table using Databricks system tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145152#M52469</link>
      <description>&lt;DIV&gt;Could you please provide some scripts or samples for that? It would be really helpful to understand.&lt;/DIV&gt;</description>
      <pubDate>Sun, 25 Jan 2026 11:13:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145152#M52469</guid>
      <dc:creator>Danish11052000</dc:creator>
      <dc:date>2026-01-25T11:13:41Z</dc:date>
    </item>
    <item>
      <title>Re: How to get read/write bytes per table using Databricks system tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145158#M52470</link>
      <description>&lt;P&gt;Here I explained how I enabled diagnostic settings&amp;nbsp;&lt;A href="https://medium.com/@databrickster/hidden-benefit-of-databricks-managed-tables-f9ff8e1801ac" target="_blank"&gt;https://medium.com/@databrickster/hidden-benefit-of-databricks-managed-tables-f9ff8e1801ac&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 25 Jan 2026 14:43:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145158#M52470</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2026-01-25T14:43:09Z</dc:date>
    </item>
    <item>
      <title>Re: How to get read/write bytes per table using Databricks system tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145164#M52471</link>
      <description>&lt;P&gt;system.access.audit focuses on governance and admin/security events. It doesn’t capture per-table I/O metrics such as read_bytes or written_bytes.&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Use system.query.history for per-statement I/O metrics (read_bytes, written_bytes, read_rows, written_rows, etc.).&lt;/LI&gt;&lt;LI&gt;Use system.access.table_lineage to identify which Unity Catalog tables were read or written. Join on statement_id where available.&amp;nbsp;&lt;BR /&gt;See if below query works for your need .&amp;nbsp;&lt;/LI&gt;&lt;/UL&gt;&lt;LI-CODE lang="python"&gt;-- Per-table read/write bytes, allocated across sources/targets per statement
WITH lineage AS (
  SELECT
    event_date,
    statement_id,
    created_by,
    source_table_full_name,
    target_table_full_name,
    source_type,
    target_type
  FROM system.access.table_lineage
  WHERE event_date BETWEEN date_sub(current_date(), 30) AND current_date()
),
qh AS (
  SELECT
    statement_id,
    executed_by,
    CAST(start_time AS DATE) AS date,
    read_bytes,
    written_bytes,
    execution_status
  FROM system.query.history
  WHERE execution_status = 'FINISHED'
)
, per_table AS (
  SELECT
    COALESCE(l.source_table_full_name, l.target_table_full_name) AS table_full_name,
    CASE
      WHEN l.source_type IS NOT NULL AND l.target_type IS NULL THEN 'read'
      WHEN l.target_type IS NOT NULL AND l.source_type IS NULL THEN 'write'
      WHEN l.source_type IS NOT NULL AND l.target_type IS NOT NULL THEN 'read_write'
      ELSE 'unknown'
    END AS operation_type,
    qh.executed_by AS user,
    qh.date AS date,
    -- allocate bytes across all sources/targets in the statement
    CASE
      WHEN l.source_type IS NOT NULL THEN
        qh.read_bytes / NULLIF(COUNT(l.source_table_full_name) OVER (PARTITION BY l.statement_id), 0)
      ELSE 0
    END AS bytes_read_alloc,
    CASE
      WHEN l.target_type IS NOT NULL THEN
        qh.written_bytes / NULLIF(COUNT(l.target_table_full_name) OVER (PARTITION BY l.statement_id), 0)
      ELSE 0
    END AS bytes_written_alloc
  FROM lineage l
  JOIN qh ON l.statement_id = qh.statement_id
)
SELECT
  date,
  table_full_name AS table,
  operation_type AS operation,
  user,
  SUM(bytes_read_alloc) AS bytes_read,
  SUM(bytes_written_alloc) AS bytes_written
FROM per_table
GROUP BY date, table_full_name, operation_type, user
ORDER BY date DESC;&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 25 Jan 2026 15:13:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-get-read-write-bytes-per-table-using-databricks-system/m-p/145164#M52471</guid>
      <dc:creator>pradeep_singh</dc:creator>
      <dc:date>2026-01-25T15:13:33Z</dc:date>
    </item>
  </channel>
</rss>

