<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic synapse pyspark delta lake merge scd type2 without primary key in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/synapse-pyspark-delta-lake-merge-scd-type2-without-primary-key/m-p/54938#M30200</link>
    <description>&lt;P&gt;Problem&lt;BR /&gt;I have a set of rows coming from previous process which has no primary key, and the composite keys are bound to change which are not a good case for composite key, only way the rows are unique is the whole row( including all keys and all values). I need to implement the SCD type2 on this data. The environment is Synapse pyspark, using delta lake Merge command and more.&lt;/P&gt;&lt;P&gt;how I tried&lt;BR /&gt;Using row hash: In this case the challenge without primary/composite key is to find which rows have changed/updated. With any updated values the row hash is changing and resulting into new row.&lt;/P&gt;&lt;P&gt;please suggest how this problem can be solved. If you have any questions on this, please write back.&lt;/P&gt;</description>
    <pubDate>Fri, 08 Dec 2023 15:15:51 GMT</pubDate>
    <dc:creator>sunil_ksheersag</dc:creator>
    <dc:date>2023-12-08T15:15:51Z</dc:date>
    <item>
      <title>synapse pyspark delta lake merge scd type2 without primary key</title>
      <link>https://community.databricks.com/t5/data-engineering/synapse-pyspark-delta-lake-merge-scd-type2-without-primary-key/m-p/54938#M30200</link>
      <description>&lt;P&gt;Problem&lt;BR /&gt;I have a set of rows coming from previous process which has no primary key, and the composite keys are bound to change which are not a good case for composite key, only way the rows are unique is the whole row( including all keys and all values). I need to implement the SCD type2 on this data. The environment is Synapse pyspark, using delta lake Merge command and more.&lt;/P&gt;&lt;P&gt;how I tried&lt;BR /&gt;Using row hash: In this case the challenge without primary/composite key is to find which rows have changed/updated. With any updated values the row hash is changing and resulting into new row.&lt;/P&gt;&lt;P&gt;please suggest how this problem can be solved. If you have any questions on this, please write back.&lt;/P&gt;</description>
      <pubDate>Fri, 08 Dec 2023 15:15:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/synapse-pyspark-delta-lake-merge-scd-type2-without-primary-key/m-p/54938#M30200</guid>
      <dc:creator>sunil_ksheersag</dc:creator>
      <dc:date>2023-12-08T15:15:51Z</dc:date>
    </item>
  </channel>
</rss>

