df = spark.createDataFrame([(2018,'Apple1',45000),(2019,'Apple1',35000),(2020,'Apple1',75000),
(2018,'Samsung',15000),(2019,'Samsung',20000),(2020,'Samsung',25000),
(2018,'Nokia',21000),(2019,'Nokia',17000),(2020,'Nokia',14000)],
['Year', 'Brand', 'Sales'])
df.createOrReplaceTempView('PhoneBrandSales')
spark.sql('SELECT * FROM PhoneBrandSales').show()
%sql
SELECT *,
Lead(Sales, 1, 0)
OVER(PARTITION BY Brand ORDER BY Year desc) AS PreviousYearSales
FROM PhoneBrandSales;
%sql
-- https://www.sqlservertutorial.net/sql-server-window-functions/sql-server-lag-function/
WITH CTE AS
(
SELECT *,
Lead(Sales, 1, 0)
OVER(PARTITION BY Brand ORDER BY Year desc) AS PreviousYearSales
FROM PhoneBrandSales;
)
SELECT * FROM CTE
ParseException:
no viable alternative at input 'WITH CTE AS\n(\n SELECT *, \n Lead(Sales, 1, 0) \n OVER(PARTITION BY Brand ORDER BY Year desc) AS PreviousYearSales \n FROM PhoneBrandSales'(line 6, pos 22)
== SQL ==
WITH CTE AS
(
SELECT *,
Lead(Sales, 1, 0)
OVER(PARTITION BY Brand ORDER BY Year desc) AS PreviousYearSales
FROM PhoneBrandSales
----------------------^^^