<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic DataBricks Auto loader vs input source files deletion detection in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-auto-loader-vs-input-source-files-deletion-detection/m-p/69763#M33910</link>
    <description>&lt;P&gt;Hi,&amp;nbsp;&lt;/P&gt;&lt;P&gt;While ingesting files from a source folder continuously, I would like to be able to detect the case where files are being deleted. As far as I can tell the Autoloader can not handle the detection of files deleted in the source folder. Hence the case can't be supported. I want to confirm that first, and if it is indeed the case, inquire are the approach work around that people use in that scenario.&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sun, 19 May 2024 07:27:59 GMT</pubDate>
    <dc:creator>Maatari</dc:creator>
    <dc:date>2024-05-19T07:27:59Z</dc:date>
    <item>
      <title>DataBricks Auto loader vs input source files deletion detection</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-auto-loader-vs-input-source-files-deletion-detection/m-p/69763#M33910</link>
      <description>&lt;P&gt;Hi,&amp;nbsp;&lt;/P&gt;&lt;P&gt;While ingesting files from a source folder continuously, I would like to be able to detect the case where files are being deleted. As far as I can tell the Autoloader can not handle the detection of files deleted in the source folder. Hence the case can't be supported. I want to confirm that first, and if it is indeed the case, inquire are the approach work around that people use in that scenario.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 19 May 2024 07:27:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-auto-loader-vs-input-source-files-deletion-detection/m-p/69763#M33910</guid>
      <dc:creator>Maatari</dc:creator>
      <dc:date>2024-05-19T07:27:59Z</dc:date>
    </item>
    <item>
      <title>Re: DataBricks Auto loader vs input source files deletion detection</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-auto-loader-vs-input-source-files-deletion-detection/m-p/69929#M33928</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/102834"&gt;@Maatari&lt;/a&gt;&amp;nbsp;&lt;SPAN&gt;Yes, it is true that Autoloader in Databricks cannot detect the deletion of files in the source folder during continuous ingestion. The Autoloader is designed to process files exactly once unless the option "cloudFiles.allowOverwrites" is enabled.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;If the files are stored in S3, we can create an alarm when an S3 DeleteObject event is triggered? Do you think this solution can help in your use case?&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;Kind regards,&lt;/P&gt;
&lt;P&gt;Yesh&lt;/P&gt;</description>
      <pubDate>Mon, 20 May 2024 07:58:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-auto-loader-vs-input-source-files-deletion-detection/m-p/69929#M33928</guid>
      <dc:creator>Yeshwanth</dc:creator>
      <dc:date>2024-05-20T07:58:59Z</dc:date>
    </item>
  </channel>
</rss>

