<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Direct access to Databricks query history database in Data Governance</title>
    <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/62015#M1704</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I would like to know if there is direct access to the Databricks query history tables. For compliance issues, I would like to be able to create reports&amp;nbsp; for something like: who has accessed a particular column in a table in the past 6 months. The query history&amp;nbsp;web interface is quite limited.&amp;nbsp; I would ideally like wo use SQL to query the history table. Is this possible?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 26 Feb 2024 18:14:40 GMT</pubDate>
    <dc:creator>WWoman</dc:creator>
    <dc:date>2024-02-26T18:14:40Z</dc:date>
    <item>
      <title>Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/62015#M1704</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I would like to know if there is direct access to the Databricks query history tables. For compliance issues, I would like to be able to create reports&amp;nbsp; for something like: who has accessed a particular column in a table in the past 6 months. The query history&amp;nbsp;web interface is quite limited.&amp;nbsp; I would ideally like wo use SQL to query the history table. Is this possible?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 26 Feb 2024 18:14:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/62015#M1704</guid>
      <dc:creator>WWoman</dc:creator>
      <dc:date>2024-02-26T18:14:40Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/63833#M1725</link>
      <description>&lt;P&gt;I would like to be able to query the query history tables by running my own queries. I do not want to use the Query History interface supplied by Databricks;&amp;nbsp; I want to be able to create python scripts that access the underling tables/views for TAC and Query history.&amp;nbsp; &amp;nbsp;From your response, it seems like this is not possible. Can you confirm that?&lt;/P&gt;</description>
      <pubDate>Fri, 15 Mar 2024 14:47:06 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/63833#M1725</guid>
      <dc:creator>WWoman</dc:creator>
      <dc:date>2024-03-15T14:47:06Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/66787#M1775</link>
      <description>&lt;LI-CODE lang="markup"&gt;%pip install databricks-sdk
dbutils.library.restartPython()&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;warehouse_id = "bc1af43449227761"
hours_back_to_check = 2&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;from databricks.sdk import WorkspaceClient
from databricks.sdk.service import sql
from pyspark.sql.types import StructType, StructField, StringType, LongType
import time

w = WorkspaceClient()
current_time_ms = int(round(time.time() * 1000))
start_time = current_time_ms - (3600000 * hours_back_to_check)&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;# Filtering options:
# https://databricks-sdk-py.readthedocs.io/en/latest/dbdataclasses/sql.html#databricks.sdk.service.sql.QueryFilter
query_filter = sql.QueryFilter(
    query_start_time_range=sql.TimeRange(start_time_ms=start_time, end_time_ms=current_time_ms),
    warehouse_ids=[warehouse_id]
)
query_ls = [query for query in w.query_history.list(filter_by=query_filter, include_metrics=True)]&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;schema = StructType([
    StructField("duration", LongType(), True),
    StructField("query_start_time_ms", LongType(), True),
    StructField("query_end_time_ms", LongType(), True),
    StructField("executed_as_user_name", StringType(), True),
    StructField("query_text", StringType(), True),
])

# Extract the relevant fields from the BaseRun objects
df = spark.createDataFrame([
    (query.duration, query.query_start_time_ms, query.query_end_time_ms, query.executed_as_user_name, query.query_text) 
    for query in query_ls
    ], schema)
df.display()&lt;/LI-CODE&gt;&lt;LI-CODE lang="markup"&gt;print([query.metrics for query in query_ls])&lt;/LI-CODE&gt;</description>
      <pubDate>Fri, 19 Apr 2024 18:45:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/66787#M1775</guid>
      <dc:creator>josh_melton</dc:creator>
      <dc:date>2024-04-19T18:45:12Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/66788#M1776</link>
      <description>&lt;P&gt;This provides query history. You may also be interested in System Tables - for compliance purposes check out Audit Logs:&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/en/administration-guide/system-tables/audit-logs.html" target="_blank"&gt;https://docs.databricks.com/en/administration-guide/system-tables/audit-logs.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 19 Apr 2024 18:47:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/66788#M1776</guid>
      <dc:creator>josh_melton</dc:creator>
      <dc:date>2024-04-19T18:47:19Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/68997#M1809</link>
      <description>&lt;P&gt;For posterity, there is a query history system table which contains all of this information which is in preview at the time of me writing this. If you're reading this later than May 2024, please check the documentation for the query metrics table.&lt;/P&gt;</description>
      <pubDate>Tue, 14 May 2024 14:06:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/68997#M1809</guid>
      <dc:creator>josh_melton</dc:creator>
      <dc:date>2024-05-14T14:06:31Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/69062#M1810</link>
      <description>&lt;P&gt;Thanks&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/60794"&gt;@josh_melton&lt;/a&gt;&amp;nbsp;! I was wondering right now about this (one day after your post!) since I only found the UI and API in the documentation and was really puzzled that there is no equivalent in unity to the Snowflake query_history table.&lt;/P&gt;</description>
      <pubDate>Wed, 15 May 2024 08:07:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/69062#M1810</guid>
      <dc:creator>ReiskaS</dc:creator>
      <dc:date>2024-05-15T08:07:10Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/69106#M1812</link>
      <description>&lt;P&gt;A quick question on this... (First of all thanks so much for the sample code!). I'm playing around with this and I would like to get the statement_type and status. I see that duration, query_start_time_ms and query_end_time_ms are int data type and defines as LongType(); executed_as_user_name and query_text are str datatype and defined as StringType. statement_type and status are listed as data types QueryStatementType and QueryStatus respectively. How would I define the StructType for these fields?&lt;/P&gt;</description>
      <pubDate>Wed, 15 May 2024 20:05:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/69106#M1812</guid>
      <dc:creator>WWoman</dc:creator>
      <dc:date>2024-05-15T20:05:57Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/76954#M1937</link>
      <description>&lt;P&gt;Sorry, missed this - try the system table and query it in SQL instead, much simpler than defining StructTypes!&lt;/P&gt;</description>
      <pubDate>Fri, 05 Jul 2024 23:19:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/76954#M1937</guid>
      <dc:creator>josh_melton</dc:creator>
      <dc:date>2024-07-05T23:19:36Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/83478#M2017</link>
      <description>&lt;P&gt;The problem with that is that I do not have access to the raw system tables... I am writing a python script to load data into a table, so that we can run queries against it&lt;/P&gt;</description>
      <pubDate>Mon, 19 Aug 2024 16:05:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/83478#M2017</guid>
      <dc:creator>WWoman</dc:creator>
      <dc:date>2024-08-19T16:05:20Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/108808#M2400</link>
      <description>&lt;P&gt;Throwing it out there that my team is also looking for a way to easily do this and I don't think the above solutions really fit our need. We are currently in the process of migrating schemas (from one name to a better name, we didn't rename because we didn't want to break processes for us or anyone else) so we have two sets of tables with the same name. In each schema set there are well over a hundred tables so going through each individually and checking if anyone is still hitting the old tables is a tedious process however none of these options give us a way to loop through them and see what is going on:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;describe history [table name] only shows DDL/DML history and doesn't show DQL (like a user/process still selecting data from the table, that's actually important to us)&lt;/LI&gt;&lt;LI&gt;&lt;SPAN&gt;system.&lt;/SPAN&gt;&lt;SPAN&gt;query&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;history has DQL history, but in our case where we have two tables with the same name, if the user doesn't identify the catalog or schema in the query text there isn't a way to distinguish the old table from the new table or tables between environments (we use different catalogs for different envs). This happens when you set the schema before running with 'use schema', unfortunately my team has a tendency to do this with 'use catalog' to enforce an environment at the start of a process so it is quite common for the schema not to be listed&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN&gt;&lt;SPAN&gt;Getting&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;request_params.commandText from system.&lt;SPAN&gt;access&lt;/SPAN&gt;&lt;SPAN&gt;.audit suffers from similar issues as above.&amp;nbsp;&lt;/SPAN&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;SPAN&gt;So I guess what we are looking for is something like `describe history [table name]` but that has the DQL history this way we can more easily run a script to determine if a table is still in use.&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/DIV&gt;</description>
      <pubDate>Tue, 04 Feb 2025 15:26:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/108808#M2400</guid>
      <dc:creator>kenmyers-8451</dc:creator>
      <dc:date>2025-02-04T15:26:10Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/108820#M2401</link>
      <description>&lt;P&gt;If you are trying to see access on a certain table query_history is a bad way to do this, parsing the SQL statement is prone to many errors. For example, if the current catalog and schema are set the query may look like "select * from my_view", where the view is accessing the table you are interested in, so from the SQL you will not be able to determine the catalog or schema. If someone creates a view onto the tables you are interested in you may not see the schema or table name in the SQL at all.&lt;/P&gt;&lt;P&gt;The best way to determine this is to use the lineage tables. (&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/admin/system-tables/lineage" target="_blank"&gt;https://learn.microsoft.com/en-us/azure/databricks/admin/system-tables/lineage&lt;/A&gt;). These table track (among other things) access to metadata and data for the objects (tables and views).&lt;/P&gt;&lt;P&gt;To find access for a specific table from 2024-12-01 to 2024-12-31 the query would be something like:&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;SELECT&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;CASE&lt;/SPAN&gt; &lt;SPAN&gt;WHEN&lt;/SPAN&gt;&lt;SPAN&gt; target_type &lt;/SPAN&gt;&lt;SPAN&gt;is&lt;/SPAN&gt; &lt;SPAN&gt;null&lt;/SPAN&gt; &lt;SPAN&gt;and&lt;/SPAN&gt;&lt;SPAN&gt; source_type &lt;/SPAN&gt;&lt;SPAN&gt;is&lt;/SPAN&gt; &lt;SPAN&gt;not&lt;/SPAN&gt; &lt;SPAN&gt;null&lt;/SPAN&gt; &lt;SPAN&gt;THEN&lt;/SPAN&gt; &lt;SPAN&gt;'read'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;WHEN&lt;/SPAN&gt;&lt;SPAN&gt; target_type &lt;/SPAN&gt;&lt;SPAN&gt;is&lt;/SPAN&gt; &lt;SPAN&gt;not&lt;/SPAN&gt; &lt;SPAN&gt;null&lt;/SPAN&gt; &lt;SPAN&gt;and&lt;/SPAN&gt;&lt;SPAN&gt; source_type &lt;/SPAN&gt;&lt;SPAN&gt;is&lt;/SPAN&gt; &lt;SPAN&gt;null&lt;/SPAN&gt; &lt;SPAN&gt;THEN&lt;/SPAN&gt; &lt;SPAN&gt;'write'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;WHEN&lt;/SPAN&gt;&lt;SPAN&gt; target_type &lt;/SPAN&gt;&lt;SPAN&gt;is&lt;/SPAN&gt; &lt;SPAN&gt;not&lt;/SPAN&gt; &lt;SPAN&gt;null&lt;/SPAN&gt; &lt;SPAN&gt;and&lt;/SPAN&gt;&lt;SPAN&gt; source_type &lt;/SPAN&gt;&lt;SPAN&gt;is&lt;/SPAN&gt; &lt;SPAN&gt;not&lt;/SPAN&gt; &lt;SPAN&gt;null&lt;/SPAN&gt; &lt;SPAN&gt;THEN&lt;/SPAN&gt; &lt;SPAN&gt;'read and write'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;ELSE&lt;/SPAN&gt; &lt;SPAN&gt;'unknown'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;END&lt;/SPAN&gt; &lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; access_type,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;target_table_full_name, source_type, target_type, created_by, tl.event_time&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;FROM&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;system.&lt;/SPAN&gt;&lt;SPAN&gt;access&lt;/SPAN&gt;&lt;SPAN&gt;.table_lineage tl&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;WHERE&lt;/SPAN&gt; &lt;SPAN&gt;lower(&lt;/SPAN&gt;&lt;SPAN&gt;source_table_full_name) &lt;/SPAN&gt;&lt;SPAN&gt;like&lt;/SPAN&gt; &lt;SPAN&gt;'my_catalog.my_schema.my_table'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;and&lt;/SPAN&gt;&lt;SPAN&gt; tl.event_date &lt;/SPAN&gt;&lt;SPAN&gt;&amp;gt;=&lt;/SPAN&gt; &lt;SPAN&gt;'2024-12-01'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;and&lt;/SPAN&gt;&lt;SPAN&gt; tl.event_date &lt;/SPAN&gt;&lt;SPAN&gt;&amp;lt;-&lt;/SPAN&gt; &lt;SPAN&gt;'2024-12-31'&lt;/SPAN&gt;&lt;SPAN&gt; ;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Tue, 04 Feb 2025 16:23:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/108820#M2401</guid>
      <dc:creator>WWoman</dc:creator>
      <dc:date>2025-02-04T16:23:15Z</dc:date>
    </item>
    <item>
      <title>Re: Direct access to Databricks query history database</title>
      <link>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/108823#M2402</link>
      <description>&lt;P&gt;Thanks this is really helpful. I didn't check&amp;nbsp;&lt;SPAN&gt;table_lineage&lt;/SPAN&gt;, I thought would be more strictly metadata related about relationships between tables and wouldn't include simple reads from a table.&lt;/P&gt;</description>
      <pubDate>Tue, 04 Feb 2025 16:42:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-governance/direct-access-to-databricks-query-history-database/m-p/108823#M2402</guid>
      <dc:creator>kenmyers-8451</dc:creator>
      <dc:date>2025-02-04T16:42:27Z</dc:date>
    </item>
  </channel>
</rss>

