Hi @bmhardy ,
As @NandiniN suggested you can use iterative approach to simulate recursion. I think in the databricks roadmap there's a plan to support recursive cte.
sample_data = [
(1, None, 2),
(2, 1, 3),
(3, 2, 4),
(4, 3, None)
]
columns = ["RowId", "ParentId", "ChildId"]
df = spark.createDataFrame(sample_data, columns)
df.createOrReplaceTempView("HierarchyTable")
# Initialize parameters
i = 1
start_row_id = 1 # The starting RowId (replace with your root node)
# Recursive anchor
df = spark.sql(f"""
SELECT RowId
, ParentId
, ChildId
, 0 AS RecursionLevel
FROM HierarchyTable
WHERE RowId = {start_row_id}
""")
# Register the initial result as a temporary view for recursion
df.createOrReplaceTempView('recursion_df')
# Use loop to simulate recursion
while True:
bill_df = spark.sql(f"""
SELECT t.RowId
, t.ParentId
, t.ChildId
, {i} AS RecursionLevel
FROM recursion_df cte
INNER JOIN HierarchyTable t ON t.ParentId = cte.RowId
""")
bill_df.createOrReplaceTempView('recursion_df')
df = df.union(bill_df)
# Check if the recursion has produced any results
if bill_df.count() == 0:
df.createOrReplaceTempView("final_df")
break
else:
i += 1 # Increment recursion level
display(df)