mtajmouati
Contributor

True  ! try this :

import requests
import json

# Define your Databricks workspace URL and API token
databricks_instance = "https://<your-databricks-instance>"
api_token = "dapi<your-api-token>"

# Fetch SQL query history
def get_query_history():
    url = f"{databricks_instance}/api/2.0/sql/history/queries"
    headers = {
        "Authorization": f"Bearer {api_token}"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

# Retrieve query history
query_history = get_query_history()

# Print retrieved query history for inspection
print(json.dumps(query_history, indent=4))

 

def extract_query_metadata(query_history):
    for query in query_history.get("res", []):
        query_id = query.get("query_id")
        query_text = query.get("query_text")
        user_name = query.get("user_name")
        # Fetch metadata from your Databricks environment or catalog
        # This is a placeholder function
        catalog, schema = fetch_catalog_and_schema(query_text, user_name)
        print(f"Query ID: {query_id}")
        print(f"Query Text: {query_text}")
        print(f"Catalog: {catalog}")
        print(f"Schema: {schema}")
        print("---")

# Placeholder function to fetch catalog and schema based on query text and user context
def fetch_catalog_and_schema(query_text, user_name):
    # Implement logic to fetch catalog and schema
    # For example, parse the query text to identify table names
    # and cross-reference with known metadata
    # Return default values if necessary
    catalog = "default_catalog"
    schema = "default_schema"
    return catalog, schema

# Extract and print metadata
extract_query_metadata(query_history)

 

Best regards,
Mehdi Tajmouati
 mehdi.tajmouati@wytasoft.com
 06 68 23 18 42
 www.wytasoft.com