I am new to Databricks. Please excuse my ignorance.
My requirement is to convert the SQL query below into Databricks SQL. The query comes from EventLog table and the output of the query goes into EventSummary
These queries can be found here
CREATE TABLE EventLog
(
EventID INT
, EventName String
, EventStartDateTime timestamp
, EventEndDateTime timestamp
)
INSERT INTO EventLog(EventID, EventName, EventStartDateTime, EventEndDateTime)
VALUES(100, 'Planting', '20210620 10:34:09 AM','20211018 10:54:49 PM')
,(200, 'Foundation', '20200420 10:34:09 AM','20211018 10:54:49 PM')
,(300, 'Seeding', '20210410 10:27:19 AM','')
,(400, 'Spreading', '20220310 10:24:09 PM','');
CREATE TABLE EventSummary
(
EventID INT
, EventName VARCHAR(50) String
, [Year] INT
, [MonthName] String
, [Hours] DECIMAL
)
I have a code like so which I want to convert to Databricks SQL:
;WITH CTE AS (
SELECT EventID,EventName,EventStartDateTime,IIF(EventEndDateTime = '',GETUTCDATE(),EventEndDateTime) EventEndDateTime
FROM EventLog
UNION ALL
SELECT EventID,EventName, DATEADD(month, DATEDIFF(month, 0, DATEADD(month , 1 , EventStartDateTime)), 0) , EventEndDateTime
FROM CTE
WHERE DATEADD(month, DATEDIFF(month, 0, DATEADD(month , 1 , EventStartDateTime)), 0) <= EventEndDateTime
)
INSERT INTO EventSummary(EventID,EventName,Year,MonthName,Hours)
SELECT EventID,EventName,YEAR(EventStartDateTime),DATENAME(MONTH,EventStartDateTime),DATEDIFF(second, EventStartDateTime, n_EventStartDateTime) / 3600.0
FROM (
SELECT EventID,EventName,EventStartDateTime,LEAD(EventStartDateTime,1,EventEndDateTime) OVER(PARTITION BY EventID,EventName ORDER BY EventStartDateTime) n_EventStartDateTime
FROM CTE
) t1
option (maxrecursion 0)
This query works very well against SQL Server but I am having issues converting this to SQL Databricks
I want to get the same result but in Databricks, I get errors that CTE cannot be found because I am calling CTE inside the first CTE
I greatly appreciate your help on this.