<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Want to see logs for lineage view run events in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139670#M51264</link>
    <description>&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/jobs/monitor#export-job-runs" target="_blank"&gt;https://docs.databricks.com/aws/en/jobs/monitor#export-job-runs &lt;/A&gt;&lt;/P&gt;&lt;P&gt;In the article look for job export&lt;/P&gt;&lt;P&gt;For the compute:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;On the compute page, click the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Advanced&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;toggle.&lt;/LI&gt;&lt;LI&gt;Click the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Logging&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;tab.&lt;/LI&gt;&lt;LI&gt;Select a destination type.&lt;/LI&gt;&lt;LI&gt;Enter the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Log path&lt;/STRONG&gt;.&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/compute/configure" target="_blank"&gt;https://docs.databricks.com/aws/en/compute/configure&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 19 Nov 2025 12:30:30 GMT</pubDate>
    <dc:creator>bianca_unifeye</dc:creator>
    <dc:date>2025-11-19T12:30:30Z</dc:date>
    <item>
      <title>Want to see logs for lineage view run events</title>
      <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139655#M51256</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;I need your help, as I am running jobs it is getting successful, when I click on job and there we can find lineage &amp;gt; View run events option when click on it. I see below steps.&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;STRONG&gt;Job Started: The job is triggered.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Waiting for Cluster: The job waits for the cluster to be ready.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Cluster Ready: The cluster becomes ready to execute the job.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Started Running: The job starts running.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;&lt;STRONG&gt;Succeeded: The job completes successfully after processing the data.&lt;/STRONG&gt;&lt;/STRONG&gt;&lt;P&gt;I want to see all 5 stages logs, where I will see it in detail. I am going to stores logs in volume, there i am able to see driver, eventlog, executor etc. in which folder they are storing because I have checked all logs but not able to see any information.&amp;nbsp;&lt;/P&gt;&lt;/LI&gt;&lt;/OL&gt;</description>
      <pubDate>Wed, 19 Nov 2025 11:11:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139655#M51256</guid>
      <dc:creator>jitendrajha11</dc:creator>
      <dc:date>2025-11-19T11:11:04Z</dc:date>
    </item>
    <item>
      <title>Re: Want to see logs for lineage view run events</title>
      <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139670#M51264</link>
      <description>&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/jobs/monitor#export-job-runs" target="_blank"&gt;https://docs.databricks.com/aws/en/jobs/monitor#export-job-runs &lt;/A&gt;&lt;/P&gt;&lt;P&gt;In the article look for job export&lt;/P&gt;&lt;P&gt;For the compute:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;On the compute page, click the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Advanced&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;toggle.&lt;/LI&gt;&lt;LI&gt;Click the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Logging&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;tab.&lt;/LI&gt;&lt;LI&gt;Select a destination type.&lt;/LI&gt;&lt;LI&gt;Enter the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Log path&lt;/STRONG&gt;.&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/compute/configure" target="_blank"&gt;https://docs.databricks.com/aws/en/compute/configure&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Nov 2025 12:30:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139670#M51264</guid>
      <dc:creator>bianca_unifeye</dc:creator>
      <dc:date>2025-11-19T12:30:30Z</dc:date>
    </item>
    <item>
      <title>Re: Want to see logs for lineage view run events</title>
      <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139687#M51269</link>
      <description>&lt;P&gt;The stages you mentioned—Job Started, Waiting for Cluster, Cluster Ready, Started Running, Succeeded—are Databricks job lifecycle events, not Spark events.&lt;BR /&gt;They are stored in Databricks internal job service, not in the driver/executor logs. You can access them via:&lt;/P&gt;&lt;P&gt;Jobs UI → View Run Events (what you already did)&lt;BR /&gt;Databricks REST API:&lt;BR /&gt;Use the&amp;nbsp;&lt;A href="https://docs.databricks.com/api/azure/workspace/jobs/getrun" target="_blank" rel="noopener"&gt;https://docs.databricks.com/api/azure/workspace/jobs/getrun&lt;/A&gt; to retrieve detailed lifecycle events programmatically.&lt;/P&gt;&lt;P&gt;&lt;!--  StartFragment   --&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;If you want to persist these lifecycle logs:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;You need to export them via API and then write them to your volume or external storage.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;The driver/event/executor logs will only show Spark-related execution details, not cluster provisioning or job trigger events.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;!--  EndFragment   --&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Nov 2025 14:33:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139687#M51269</guid>
      <dc:creator>nayan_wylde</dc:creator>
      <dc:date>2025-11-19T14:33:12Z</dc:date>
    </item>
    <item>
      <title>Re: Want to see logs for lineage view run events</title>
      <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139688#M51270</link>
      <description>&lt;P&gt;Hi Team/Member,&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;As I am running jobs it is getting successful, when I click on job and there we can find&lt;STRONG&gt; lineage &amp;gt; View run events&lt;/STRONG&gt; option when click on it. We find below steps and also added screenshot of it. I want screenshot stages logs, where i will find logs for stages in the screenshot.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;STRONG&gt;Job Started: The job is triggered.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Waiting for Cluster: The job waits for the cluster to be ready.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Cluster Ready: The cluster becomes ready to execute the job.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Started Running: The job starts running.&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;Succeeded: The job completes successfully after processing the data.&lt;/STRONG&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Nov 2025 14:33:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139688#M51270</guid>
      <dc:creator>jitendrajha11</dc:creator>
      <dc:date>2025-11-19T14:33:20Z</dc:date>
    </item>
    <item>
      <title>Re: Want to see logs for lineage view run events</title>
      <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139691#M51271</link>
      <description>&lt;P&gt;in&amp;nbsp;&lt;SPAN&gt;Jobs UI → View Run Events I am not able to see anything please find the attachment and provide information step by step&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 19 Nov 2025 14:49:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/139691#M51271</guid>
      <dc:creator>jitendrajha11</dc:creator>
      <dc:date>2025-11-19T14:49:39Z</dc:date>
    </item>
    <item>
      <title>Re: Want to see logs for lineage view run events</title>
      <link>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/140808#M51537</link>
      <description>&lt;P&gt;Hi there,&lt;/P&gt;
&lt;P&gt;I vibe-coded* a query where I was able to derive most of your events from the system tables:&lt;/P&gt;
&lt;DIV&gt;
&lt;UL&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/admin/system-tables/jobs#jobs" target="_blank" rel="noopener"&gt;&lt;SPAN&gt;system.lakeflow.jobs&lt;/SPAN&gt;&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/admin/system-tables/jobs#runs" target="_blank" rel="noopener"&gt;&lt;SPAN&gt;system.lakeflow.job_run_timeline&lt;/SPAN&gt;&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/admin/system-tables/jobs#task-timeline" target="_blank" rel="noopener"&gt;&lt;SPAN&gt;system.lakeflow.job_task_run_timeline&lt;/SPAN&gt;&lt;/A&gt;&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV&gt;&lt;SPAN&gt;If you have SELECT access to system tables, this could be an efficient way to gather the events. You could set up a Spark Declarative Pipeline to perform incremental refreshes to build history. Again, the phases are derived from timestamps in the system tables, rather than being clearly labeled. I did &lt;STRONG&gt;NOT&lt;/STRONG&gt; verify that they match the run events in the Job UI, so you may want to do that still:&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;
&lt;DIV&gt;
&lt;OL&gt;
&lt;LI&gt;&lt;SPAN&gt;Job Started: job_start_time&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;Waiting for Cluster: job_start_time -&amp;gt; first_task_start&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;Cluster Ready: first_task_start timestamp&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;Execution: first_task_start -&amp;gt; last_task_end&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;Result: result_state + job_end_time&lt;/SPAN&gt;&lt;/LI&gt;
&lt;/OL&gt;
&lt;LI-CODE lang="python"&gt;# Job Lifecycle Timeline Analysis
# Purpose: Derive job lifecycle phases by comparing job-level and task-level timing
# Dependencies: system.lakeflow.job_run_timeline, system.lakeflow.job_task_run_timeline, system.lakeflow.jobs
# Assumptions: Unity Catalog enabled, user has SELECT permissions on system tables

job_lifecycle_df = spark.sql("""
------------------------------------------------------------------------
-- CTE: job_runs
-- Source: system.lakeflow.job_run_timeline
-- Purpose: Extract the top-level job run records with start/end times
-- 
-- This table contains one row per job run with the overall run timing.
-- It does NOT break down into phases (waiting, running, etc.) - that's
-- why we need to join with task-level data to derive those phases.
-- 
-- Filtering to last 7 days for performance - adjust as needed.
------------------------------------------------------------------------
WITH job_runs AS (
  SELECT
    job_id,
    run_id,
    run_name,
    trigger_type,
    run_type,
    result_state,
    termination_code,
    period_start_time AS job_start_time,
    period_end_time AS job_end_time
  FROM system.lakeflow.job_run_timeline
  WHERE period_end_time &amp;gt;= CURRENT_DATE - INTERVAL 7 DAYS
),

------------------------------------------------------------------------
-- CTE: task_timing
-- Source: system.lakeflow.job_task_run_timeline
-- Purpose: Aggregate task-level timing to determine when actual work started/ended
-- 
-- The key insight: the GAP between job_start_time and the first task starting
-- represents "waiting for cluster" time. Tasks can't start until compute is ready.
-- 
-- We use MIN(period_start_time) to find when the first task began (cluster ready)
-- and MAX(period_end_time) to find when all tasks completed.
-- 
-- Join key: job_run_id in this table maps to run_id in job_run_timeline.
-- (Note: run_id in this table is the task's own run ID, not the parent job run)
------------------------------------------------------------------------
task_timing AS (
  SELECT
    job_id,
    job_run_id,
    MIN(period_start_time) AS first_task_start,
    MAX(period_end_time) AS last_task_end,
    COUNT(DISTINCT task_key) AS task_count
  FROM system.lakeflow.job_task_run_timeline
  WHERE period_end_time &amp;gt;= CURRENT_DATE - INTERVAL 7 DAYS
  GROUP BY job_id, job_run_id
),

------------------------------------------------------------------------
-- CTE: latest_jobs
-- Source: system.lakeflow.jobs
-- Purpose: Get the current job name for each job_id
-- 
-- The jobs table is versioned - every time a job config changes, a new row
-- is added with an updated change_time. Without deduplication, joining
-- directly causes row multiplication (one output row per job version).
-- 
-- ROW_NUMBER with PARTITION BY job_id ORDER BY change_time DESC assigns
-- rn=1 to the most recent version. We filter to rn=1 in the join.
------------------------------------------------------------------------
latest_jobs AS (
  SELECT
    job_id,
    name,
    ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY change_time DESC) AS rn
  FROM system.lakeflow.jobs
)

------------------------------------------------------------------------
-- Main SELECT
-- Purpose: Join the three CTEs to produce the job lifecycle view
-- 
-- Lifecycle phases derived:
--   1. Job Started:           job_start_time (from job_run_timeline)
--   2. Waiting for Cluster:   job_start_time -&amp;gt; first_task_start
--   3. Cluster Ready:         first_task_start timestamp
--   4. Execution:             first_task_start -&amp;gt; last_task_end
--   5. Result:                result_state + job_end_time
-- 
-- Additional derived metrics:
--   - cleanup_duration: time between last task completing and job officially ending
--   - total_duration: end-to-end job time
-- 
-- LEFT JOINs used because:
--   - Some jobs may fail before any tasks start (no task_timing records)
--   - Some jobs may have been deleted (no latest_jobs record)
-- 
-- Filtered to run_type = 'JOB_RUN' to exclude SUBMIT_RUN and WORKFLOW_RUN
-- which have different semantics.
------------------------------------------------------------------------
SELECT
  j.job_id,
  j.run_id,
  jobs.name AS job_name,
  j.trigger_type,
  j.run_type,
  
  -- Stage 1: Job Started
  j.job_start_time,

    -- Stage 2: Result
  j.job_end_time,
  j.result_state,
  j.termination_code,
  
  -- Stage 3-4: Waiting for Cluster (gap between job start and first task start)
  t.first_task_start AS cluster_ready_time,
  ROUND(TIMESTAMPDIFF(MILLISECOND, j.job_start_time, t.first_task_start) / 1000.0, 3) AS waiting_for_cluster_sec,
  
  -- Stage 5: Running (first task start to last task end)
  t.last_task_end AS execution_end_time,
  ROUND(TIMESTAMPDIFF(MILLISECOND, t.first_task_start, t.last_task_end) / 1000.0, 3) AS execution_duration_sec,
  
  -- Cleanup time (last task end to job end)
  ROUND(TIMESTAMPDIFF(MILLISECOND, t.last_task_end, j.job_end_time) / 1000.0, 3) AS cleanup_duration_sec,
  
  -- Total duration
  ROUND(TIMESTAMPDIFF(MILLISECOND, j.job_start_time, j.job_end_time) / 1000.0, 3) AS total_duration_sec,
  
  -- Task count for context
  t.task_count

FROM job_runs j
LEFT JOIN task_timing t
  ON j.job_id = t.job_id AND j.run_id = t.job_run_id
LEFT JOIN latest_jobs jobs
  ON j.job_id = jobs.job_id AND jobs.rn = 1
WHERE j.run_type = 'JOB_RUN'
ORDER BY j.job_start_time DESC
""")

display(job_lifecycle_df)&lt;/LI-CODE&gt;
&lt;P&gt;*Note: In accordance with our community Generative AI policy, I did personally verify the results in a Databricks workspace.&lt;/P&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;</description>
      <pubDate>Tue, 02 Dec 2025 01:50:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/want-to-see-logs-for-lineage-view-run-events/m-p/140808#M51537</guid>
      <dc:creator>Commitchell</dc:creator>
      <dc:date>2025-12-02T01:50:47Z</dc:date>
    </item>
  </channel>
</rss>

