<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Data deduplication in MVP Articles</title>
    <link>https://community.databricks.com/t5/mvp-articles/data-deduplication/m-p/149545#M93</link>
    <description>&lt;P&gt;At the Lakehouse, we don't enforce Primary Keys, which is why the deduplication strategy is so important. One of my favourites is using transformWithStateInPandas. Of course, it only makes sense in certain scenarios. See all five major strategies on my blog #databricks&lt;/P&gt;
&lt;P&gt;&lt;A href="https://databrickster.medium.com/deduplicating-data-on-the-databricks-lakehouse-5-ways-36a80987c716" target="_blank"&gt;https://databrickster.medium.com/deduplicating-data-on-the-databricks-lakehouse-5-ways-36a80987c716&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://www.sunnydata.ai/blog/databricks-deduplication-strategies-lakehouse" target="_blank"&gt;https://www.sunnydata.ai/blog/databricks-deduplication-strategies-lakehouse&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="dedup1.png" style="width: 999px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/24430iF291AF2CF1BF799A/image-size/large?v=v2&amp;amp;px=999" role="button" title="dedup1.png" alt="dedup1.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Sat, 28 Feb 2026 13:36:58 GMT</pubDate>
    <dc:creator>Hubert-Dudek</dc:creator>
    <dc:date>2026-02-28T13:36:58Z</dc:date>
    <item>
      <title>Data deduplication</title>
      <link>https://community.databricks.com/t5/mvp-articles/data-deduplication/m-p/149545#M93</link>
      <description>&lt;P&gt;At the Lakehouse, we don't enforce Primary Keys, which is why the deduplication strategy is so important. One of my favourites is using transformWithStateInPandas. Of course, it only makes sense in certain scenarios. See all five major strategies on my blog #databricks&lt;/P&gt;
&lt;P&gt;&lt;A href="https://databrickster.medium.com/deduplicating-data-on-the-databricks-lakehouse-5-ways-36a80987c716" target="_blank"&gt;https://databrickster.medium.com/deduplicating-data-on-the-databricks-lakehouse-5-ways-36a80987c716&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://www.sunnydata.ai/blog/databricks-deduplication-strategies-lakehouse" target="_blank"&gt;https://www.sunnydata.ai/blog/databricks-deduplication-strategies-lakehouse&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="dedup1.png" style="width: 999px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/24430iF291AF2CF1BF799A/image-size/large?v=v2&amp;amp;px=999" role="button" title="dedup1.png" alt="dedup1.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 28 Feb 2026 13:36:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/mvp-articles/data-deduplication/m-p/149545#M93</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2026-02-28T13:36:58Z</dc:date>
    </item>
  </channel>
</rss>

