<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: SQL query with leads and lags in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14278#M8798</link>
    <description>&lt;P&gt;Hi there @Eric Lohbeck​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Does @Hubert Dudek​&amp;nbsp; response answer your question? If yes, would you be happy to mark it as best so that other members can find the solution more quickly?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 02 Sep 2022 05:05:11 GMT</pubDate>
    <dc:creator>Vidula</dc:creator>
    <dc:date>2022-09-02T05:05:11Z</dc:date>
    <item>
      <title>SQL query with leads and lags</title>
      <link>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14275#M8795</link>
      <description>&lt;P&gt;I'm trying to create a new column that fills in the nulls below.&amp;nbsp;I tried using leads and lags but isn't turning out right.&amp;nbsp;Basically trying to figure out who is in "possession" of the record, given the TransferFrom and TransferTo columns and sequence of events. For instance, 35 was in possession of the record until they transferred it to 57. My current query will only populate 35 in record 2 since it only leads by one record.&amp;nbsp;I need the query to populate all records prior to this as well if 35 is the first value found in the TransferFrom column. Any ideas?  (btw, I also tried to use outer apply which works in SQL Server but isn't supported by spark SQL)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Create table script:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;  CREATE TABLE results (
&amp;nbsp;   OrderID int
&amp;nbsp;  ,TransferFrom string
&amp;nbsp;  ,TransferTo string
&amp;nbsp;  ,ActionTime timestamp)
&amp;nbsp;   &amp;nbsp;
  INSERT INTO results
  VALUES
&amp;nbsp;  (1,null,null,'2020-01-01 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-02 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-03 00:00:00'),
&amp;nbsp;  (1,'35','57','2020-01-04 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-05 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-06 00:00:00'),
&amp;nbsp;  (1,'57','45','2020-01-07 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-08 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-09 00:00:00'),
&amp;nbsp;  (1,null,null,'2020-01-10 00:00:00')&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Current query that doesn't work:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;  SELECT *
     ,coalesce(
      lead(TransferFrom) over (partition by OrderID order by ActionTime)
      ,TransferFrom
      ,lag(TransferTo) over (partition by OrderID order by ActionTime)) as NewColumn
  FROM results&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Current query result that is incorrect:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1718i58AB6116A955A8C2/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Desired query result:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1732i86256DCE15CC5864/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 08 Jul 2022 13:54:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14275#M8795</guid>
      <dc:creator>ejloh</dc:creator>
      <dc:date>2022-07-08T13:54:35Z</dc:date>
    </item>
    <item>
      <title>Re: SQL query with leads and lags</title>
      <link>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14276#M8796</link>
      <description>&lt;P&gt;This is likely sub-optimal for spark SQL, but it got me the right answer.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;with cte1 as (
    select *
    from results
)
, cte2 as (
    select *
    from cte1
    where TransferFrom is not null
    or TransferTo is not null
    order by ActionTime
)
, cte3 as (
    select distinct OrderID, TransferFrom as Team from cte2
    union
    select distinct OrderID, TransferTo as Team from cte2
)
, cte4 as (
    select a.*
    ,ifnull(timestampadd(MICROSECOND, 1, c.ActionTime),'2000-01-01T00:00:00.000+0000') as StartTime
    ,ifnull(b.ActionTime,'9999-12-31T00:00:00.000+0000') as EndTime
    from cte3 as a
    left join cte2 as b
      on a.OrderID = b.OrderID
      and a.Team = b.TransferFrom
    left join cte2 as c
      on a.OrderID = c.OrderID
      and a.Team = c.TransferTo
    order by OrderID, StartTime
)
, cte5 as (
    select a.*, b.Team
    from cte1 as a
    join cte4 as b
      on a.OrderID = b.OrderID
      and a.ActionTime between b.StartTime and b.EndTime
)
&amp;nbsp;
select * 
from cte5
order by 1, 4&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 08 Jul 2022 15:28:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14276#M8796</guid>
      <dc:creator>ejloh</dc:creator>
      <dc:date>2022-07-08T15:28:27Z</dc:date>
    </item>
    <item>
      <title>Re: SQL query with leads and lags</title>
      <link>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14277#M8797</link>
      <description>&lt;P&gt;Please just replace OVER with&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;IGNORE NULLS OVER&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 08 Jul 2022 17:18:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14277#M8797</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-07-08T17:18:16Z</dc:date>
    </item>
    <item>
      <title>Re: SQL query with leads and lags</title>
      <link>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14278#M8798</link>
      <description>&lt;P&gt;Hi there @Eric Lohbeck​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Does @Hubert Dudek​&amp;nbsp; response answer your question? If yes, would you be happy to mark it as best so that other members can find the solution more quickly?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 02 Sep 2022 05:05:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sql-query-with-leads-and-lags/m-p/14278#M8798</guid>
      <dc:creator>Vidula</dc:creator>
      <dc:date>2022-09-02T05:05:11Z</dc:date>
    </item>
  </channel>
</rss>

