%pip install databricks-sdk
dbutils.library.restartPython()
warehouse_id = "bc1af43449227761"
hours_back_to_check = 2
from databricks.sdk import WorkspaceClient
from databricks.sdk.service import sql
from pyspark.sql.types import StructType, StructField, StringType, LongType
import time
w = WorkspaceClient()
current_time_ms = int(round(time.time() * 1000))
start_time = current_time_ms - (3600000 * hours_back_to_check)
# Filtering options:
# https://databricks-sdk-py.readthedocs.io/en/latest/dbdataclasses/sql.html#databricks.sdk.service.sql.QueryFilter
query_filter = sql.QueryFilter(
query_start_time_range=sql.TimeRange(start_time_ms=start_time, end_time_ms=current_time_ms),
warehouse_ids=[warehouse_id]
)
query_ls = [query for query in w.query_history.list(filter_by=query_filter, include_metrics=True)]
schema = StructType([
StructField("duration", LongType(), True),
StructField("query_start_time_ms", LongType(), True),
StructField("query_end_time_ms", LongType(), True),
StructField("executed_as_user_name", StringType(), True),
StructField("query_text", StringType(), True),
])
# Extract the relevant fields from the BaseRun objects
df = spark.createDataFrame([
(query.duration, query.query_start_time_ms, query.query_end_time_ms, query.executed_as_user_name, query.query_text)
for query in query_ls
], schema)
df.display()
print([query.metrics for query in query_ls])