<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: numSourceRows greater than expected in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9753#M5068</link>
    <description>&lt;P&gt;That is indeed interesting, never looked into it.&lt;/P&gt;&lt;P&gt;I just searched on the delta github space and found some commits that show there is a bit more to it than just a count:&lt;/P&gt;&lt;P&gt;&lt;A href="https://github.com/delta-io/delta/commit/d2804cb92a7e36863144c7be9c55df1c6f1c1a1e" target="test_blank"&gt;https://github.com/delta-io/delta/commit/d2804cb92a7e36863144c7be9c55df1c6f1c1a1e&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://github.com/delta-io/delta/commit/8624b92ddd8d47f98e91b88b19b6d4af2e09033b" target="test_blank"&gt;https://github.com/delta-io/delta/commit/8624b92ddd8d47f98e91b88b19b6d4af2e09033b&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 08 Feb 2023 12:18:10 GMT</pubDate>
    <dc:creator>-werners-</dc:creator>
    <dc:date>2023-02-08T12:18:10Z</dc:date>
    <item>
      <title>numSourceRows greater than expected</title>
      <link>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9752#M5067</link>
      <description>&lt;P&gt;Hey&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am doing an upsert of a source DataFrame into a target table. Before said upsert, I print out the source DataFrame's row count, which is a bit smaller than what `numSourceRows` says after the operation completes and I check the operationMetrics. Two things occurred to me as to why this is happening:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;The matching condition is being matched more than once (it's not, I checked. And it wouldn't make sense that this affects the field, according to what the Doc says)&lt;/LI&gt;&lt;LI&gt;Some rows are being modified because they are written in the same page as the other truly modified rows (still doesn't make sense).&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;What situations might cause this?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 08 Feb 2023 10:51:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9752#M5067</guid>
      <dc:creator>rbricks</dc:creator>
      <dc:date>2023-02-08T10:51:54Z</dc:date>
    </item>
    <item>
      <title>Re: numSourceRows greater than expected</title>
      <link>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9753#M5068</link>
      <description>&lt;P&gt;That is indeed interesting, never looked into it.&lt;/P&gt;&lt;P&gt;I just searched on the delta github space and found some commits that show there is a bit more to it than just a count:&lt;/P&gt;&lt;P&gt;&lt;A href="https://github.com/delta-io/delta/commit/d2804cb92a7e36863144c7be9c55df1c6f1c1a1e" target="test_blank"&gt;https://github.com/delta-io/delta/commit/d2804cb92a7e36863144c7be9c55df1c6f1c1a1e&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://github.com/delta-io/delta/commit/8624b92ddd8d47f98e91b88b19b6d4af2e09033b" target="test_blank"&gt;https://github.com/delta-io/delta/commit/8624b92ddd8d47f98e91b88b19b6d4af2e09033b&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 08 Feb 2023 12:18:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9753#M5068</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-02-08T12:18:10Z</dc:date>
    </item>
    <item>
      <title>Re: numSourceRows greater than expected</title>
      <link>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9754#M5069</link>
      <description>&lt;P&gt;could you share your code snippet please? also share the expected output.&lt;/P&gt;</description>
      <pubDate>Wed, 01 Mar 2023 18:54:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/numsourcerows-greater-than-expected/m-p/9754#M5069</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2023-03-01T18:54:22Z</dc:date>
    </item>
  </channel>
</rss>

