<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Is Autoloader suitable to load full dumps? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/is-autoloader-suitable-to-load-full-dumps/m-p/55005#M30216</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I recently completed the fundamentals &amp;amp; advanced data engineer exam, yet I've got a question about Autoloader. Please don't go too hard on me, since I lack practical experience at this point in time &lt;span class="lia-unicode-emoji" title=":winking_face:"&gt;😉&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Docs say this is incremental ingestion, so it's easy to load new files that contain all-new records that go into the stream. There's also the option to allow overwriting of files. What if the files provided by a source system are&lt;/P&gt;&lt;P&gt;A) full dumps which contain ALL records currently present in the system (missing records were deleted) so the loader needs to check for new, changed or missing records&lt;/P&gt;&lt;P&gt;B) delta, only new or changed records (deletes must be flagged)&lt;/P&gt;&lt;P&gt;Is Autoloader/COPY INTO still a good fit? Perhaps using a MERGE logic?&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;Roger&lt;/P&gt;</description>
    <pubDate>Sun, 10 Dec 2023 19:54:41 GMT</pubDate>
    <dc:creator>quakenbush</dc:creator>
    <dc:date>2023-12-10T19:54:41Z</dc:date>
    <item>
      <title>Is Autoloader suitable to load full dumps?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-autoloader-suitable-to-load-full-dumps/m-p/55005#M30216</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I recently completed the fundamentals &amp;amp; advanced data engineer exam, yet I've got a question about Autoloader. Please don't go too hard on me, since I lack practical experience at this point in time &lt;span class="lia-unicode-emoji" title=":winking_face:"&gt;😉&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Docs say this is incremental ingestion, so it's easy to load new files that contain all-new records that go into the stream. There's also the option to allow overwriting of files. What if the files provided by a source system are&lt;/P&gt;&lt;P&gt;A) full dumps which contain ALL records currently present in the system (missing records were deleted) so the loader needs to check for new, changed or missing records&lt;/P&gt;&lt;P&gt;B) delta, only new or changed records (deletes must be flagged)&lt;/P&gt;&lt;P&gt;Is Autoloader/COPY INTO still a good fit? Perhaps using a MERGE logic?&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;Roger&lt;/P&gt;</description>
      <pubDate>Sun, 10 Dec 2023 19:54:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-autoloader-suitable-to-load-full-dumps/m-p/55005#M30216</guid>
      <dc:creator>quakenbush</dc:creator>
      <dc:date>2023-12-10T19:54:41Z</dc:date>
    </item>
  </channel>
</rss>

