<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Translations from T-SQL: TOP 1 OUTER APPLY or LEFT JOIN in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101260#M40601</link>
    <description>&lt;P&gt;Hi &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/135172"&gt;@MattHeidebrecht&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Great! If this resolves your question, please consider marking it as the solution. It helps others in the community find answers more easily. &lt;span class="lia-unicode-emoji" title=":smiling_face_with_smiling_eyes:"&gt;😊&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 06 Dec 2024 16:18:55 GMT</pubDate>
    <dc:creator>filipniziol</dc:creator>
    <dc:date>2024-12-06T16:18:55Z</dc:date>
    <item>
      <title>Translations from T-SQL: TOP 1 OUTER APPLY or LEFT JOIN</title>
      <link>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101121#M40550</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;I am wondering how you would go about translating either of the below to Spark SQL in Databricks.&amp;nbsp; They are more or less equivalent statements in T-SQL.&lt;/P&gt;&lt;P&gt;Please note that I am attempting to pair each unique Policy (IPI_ID) record with its highest numbered Location (IL_ID) record.&amp;nbsp; There can be many Location records for each Policy record.&amp;nbsp; The Location table links to the Policy table via Policy.IPI_ID = Location.IL_IPI_ID.&lt;/P&gt;&lt;P&gt;I have tried to utilize LIMIT 1 in certain ways (example further below) but either receive errors or the results do not match.&lt;/P&gt;&lt;P&gt;Any help or suggestions are appreciated!&lt;/P&gt;&lt;P&gt;T-SQL:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;select
	ipi.IPI_ID
	,loc.IL_ID
from Policy ipi
outer apply
	(
	select top 1 il.IL_ID
	from Location il
	where il.IL_IPI_ID = ipi.IPI_ID
	order by
		il.IL_ID desc
	) loc

--

select
	ipi.IPI_ID
	,il.IL_ID
from Policy ipi
left join Location il
	on il.IL_ID =
		(
		select top 1 il2.IL_ID
		from Location il2
		where il2.IL_IPI_ID = ipi.IPI_ID
		order by
			il2.IL_ID desc
		)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Errors out in Databricks Spark SQL:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;select
	ipi.IPI_ID
	,il.IL_ID
from Policy ipi
left join Location il
	on il.IL_ID =
	(
	select il2.IL_ID
	from Location il2
	where il2.IL_IPI_ID = ipi.IPI_ID
	order by
		il2.IL_ID desc
  limit 1
  );&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Dec 2024 18:21:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101121#M40550</guid>
      <dc:creator>MattHeidebrecht</dc:creator>
      <dc:date>2024-12-05T18:21:12Z</dc:date>
    </item>
    <item>
      <title>Re: Translations from T-SQL: TOP 1 OUTER APPLY or LEFT JOIN</title>
      <link>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101129#M40553</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/135172"&gt;@MattHeidebrecht&lt;/a&gt;&amp;nbsp;,&lt;BR /&gt;&lt;BR /&gt;You can use ROW_NUMBER window function to order rows and then filter on that rank.&lt;BR /&gt;&lt;BR /&gt;Here is the query:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;WITH loc_ranked AS (
    SELECT
        IL_IPI_ID,
        IL_ID,
        ROW_NUMBER() OVER (PARTITION BY IL_IPI_ID ORDER BY IL_ID DESC) AS rn
    FROM Location
)

SELECT
    ipi.IPI_ID,
    loc_ranked.IL_ID
FROM Policy ipi
LEFT JOIN loc_ranked
    ON ipi.IPI_ID = loc_ranked.IL_IPI_ID
    AND loc_ranked.rn = 1;&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is the end to end example with sample data:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;CREATE OR REPLACE TEMP VIEW Policy AS
SELECT * FROM VALUES
  (1),
  (2),
  (3)
AS t(IPI_ID);

CREATE OR REPLACE TEMP VIEW Location AS
SELECT * FROM VALUES
  (1, 10),
  (1, 5),
  (2, 100),
  (2, 90),
  (3, 300),
  (3, 250),
  (3, 100)
AS t(IL_IPI_ID, IL_ID);

WITH loc_ranked AS (
    SELECT
        IL_IPI_ID,
        IL_ID,
        ROW_NUMBER() OVER (PARTITION BY IL_IPI_ID ORDER BY IL_ID DESC) AS rn
    FROM Location
)
SELECT
    ipi.IPI_ID,
    loc_ranked.IL_ID
FROM Policy ipi
LEFT JOIN loc_ranked
    ON ipi.IPI_ID = loc_ranked.IL_IPI_ID
    AND loc_ranked.rn = 1;&lt;/LI-CODE&gt;&lt;P&gt;The result:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="filipniziol_1-1733427959985.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/13353i77C8789BBB559D7D/image-size/medium?v=v2&amp;amp;px=400" role="button" title="filipniziol_1-1733427959985.png" alt="filipniziol_1-1733427959985.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Dec 2024 19:46:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101129#M40553</guid>
      <dc:creator>filipniziol</dc:creator>
      <dc:date>2024-12-05T19:46:07Z</dc:date>
    </item>
    <item>
      <title>Re: Translations from T-SQL: TOP 1 OUTER APPLY or LEFT JOIN</title>
      <link>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101251#M40596</link>
      <description>&lt;P&gt;Thanks filipniziol!&amp;nbsp; I'll start running with that when I run into cases where I need an embedded TOP 1.&lt;/P&gt;</description>
      <pubDate>Fri, 06 Dec 2024 15:22:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101251#M40596</guid>
      <dc:creator>MattHeidebrecht</dc:creator>
      <dc:date>2024-12-06T15:22:57Z</dc:date>
    </item>
    <item>
      <title>Re: Translations from T-SQL: TOP 1 OUTER APPLY or LEFT JOIN</title>
      <link>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101260#M40601</link>
      <description>&lt;P&gt;Hi &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/135172"&gt;@MattHeidebrecht&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Great! If this resolves your question, please consider marking it as the solution. It helps others in the community find answers more easily. &lt;span class="lia-unicode-emoji" title=":smiling_face_with_smiling_eyes:"&gt;😊&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 06 Dec 2024 16:18:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/translations-from-t-sql-top-1-outer-apply-or-left-join/m-p/101260#M40601</guid>
      <dc:creator>filipniziol</dc:creator>
      <dc:date>2024-12-06T16:18:55Z</dc:date>
    </item>
  </channel>
</rss>

