Hi All,
I have 22 postgress tables and i need to implement SCD type 2 and create azure Databricks pipeline . However my project team doesn't want to use delta tables concept . Have anyone implemented this ? below is how i planned to do
try:
df_src = spark.read.jdbc(url=jdbcUrl_source, table="data_table_1", properties=connectionProperties)
except Exception as e:
print("not able to connect")
print(f"Error is :{e}")
raise Exception
try:
data_table_1_target_df = spark.read.jdbc(url=jdbcUrl_target, table="data_table_1", properties=connectionProperties)
except Exception as e:
print("Target path not found")
print(f"Error is :{e}")
raise Exception
merged_data_df = spark.sql("""
MERGE INTO target_table AS t
USING scd_data AS s
ON t.id = s.id
WHEN MATCHED AND t.status = TRUE THEN
UPDATE SET
t.effective_to_date = s.start_date,
t.status = FALSE
WHEN NOT MATCHED THEN
INSERT (id, cus_name, age, effective_from_date, effective_to_date, status)
VALUES (s.id, s.cus_name, s.age, s.start_date, s.end_date, s.is_active)
""")