<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How Auto Loader works – file level or row level? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-auto-loader-works-file-level-or-row-level/m-p/126822#M47777</link>
    <description>&lt;P&gt;Does Auto Loader work on file level or row level? If it works on file level and does not process the same file again, then how can we make it pick only the new rows when data is appended to that file?&lt;/P&gt;</description>
    <pubDate>Tue, 29 Jul 2025 15:52:20 GMT</pubDate>
    <dc:creator>Akshay_Petkar</dc:creator>
    <dc:date>2025-07-29T15:52:20Z</dc:date>
    <item>
      <title>How Auto Loader works – file level or row level?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-auto-loader-works-file-level-or-row-level/m-p/126822#M47777</link>
      <description>&lt;P&gt;Does Auto Loader work on file level or row level? If it works on file level and does not process the same file again, then how can we make it pick only the new rows when data is appended to that file?&lt;/P&gt;</description>
      <pubDate>Tue, 29 Jul 2025 15:52:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-auto-loader-works-file-level-or-row-level/m-p/126822#M47777</guid>
      <dc:creator>Akshay_Petkar</dc:creator>
      <dc:date>2025-07-29T15:52:20Z</dc:date>
    </item>
    <item>
      <title>Re: How Auto Loader works – file level or row level?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-auto-loader-works-file-level-or-row-level/m-p/126825#M47778</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/88335"&gt;@Akshay_Petkar&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Autoloader works on file level. Now, by default autoloader is configured with following option:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;cloudFiles.allowOverwrites = false&lt;/LI-CODE&gt;&lt;P&gt;So, above option causes&amp;nbsp;&lt;SPAN&gt;files to be processed exactly once.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;But when you switch this option to true, then&amp;nbsp; Auto Loader is guaranteed to process the latest version of the file. But keep in mind that autloader will reprocess entire file (even if there was partial update).&lt;BR /&gt;You can read detail description of this behaviour here:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/ingestion/cloud-object-storage/auto-loader/faq#does-auto-loader-process-the-file-again-when-the-file-gets-appended-or-overwritten" target="_blank" rel="noopener"&gt;Auto Loader FAQ - Azure Databricks | Microsoft Learn&lt;/A&gt;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 29 Jul 2025 16:13:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-auto-loader-works-file-level-or-row-level/m-p/126825#M47778</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-07-29T16:13:31Z</dc:date>
    </item>
  </channel>
</rss>

