<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Autoloader: Read old version of file. Read modification time is X, latest modification time is X in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-read-old-version-of-file-read-modification-time-is-x/m-p/67359#M33328</link>
    <description>&lt;P&gt;&lt;SPAN&gt;I'm recieving this error from autoloader. It seems to be stuck on this one file. I don't care when it was read and last modified, I just want to ingest it. Any ideas?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;java.io.IOException: Read old version of file s3a://&amp;lt;file-path&amp;gt;.json. Read modification time is 1713910814000, latest modification time is 1713925112000&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at com.databricks.sql.io.StalenessChecker$Impl.check(StalenessChecker.java:223) at&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;com.databricks.photon.NativeIOBroker.lambda$new$0(NativeIOBroker.java:374)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at org.apache.spark.TaskContextImpl.$anonfun$invokeTaskCompletionListeners$1(TaskContextImpl.scala:173)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at org.apache.spark.TaskContextImpl.$anonfun$invokeTaskCompletionListeners$1$adapted(TaskContextImpl.scala:173)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:228)&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 26 Apr 2024 08:28:05 GMT</pubDate>
    <dc:creator>stevenayers-bge</dc:creator>
    <dc:date>2024-04-26T08:28:05Z</dc:date>
    <item>
      <title>Autoloader: Read old version of file. Read modification time is X, latest modification time is X</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-read-old-version-of-file-read-modification-time-is-x/m-p/67359#M33328</link>
      <description>&lt;P&gt;&lt;SPAN&gt;I'm recieving this error from autoloader. It seems to be stuck on this one file. I don't care when it was read and last modified, I just want to ingest it. Any ideas?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;java.io.IOException: Read old version of file s3a://&amp;lt;file-path&amp;gt;.json. Read modification time is 1713910814000, latest modification time is 1713925112000&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at com.databricks.sql.io.StalenessChecker$Impl.check(StalenessChecker.java:223) at&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;com.databricks.photon.NativeIOBroker.lambda$new$0(NativeIOBroker.java:374)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at org.apache.spark.TaskContextImpl.$anonfun$invokeTaskCompletionListeners$1(TaskContextImpl.scala:173)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at org.apache.spark.TaskContextImpl.$anonfun$invokeTaskCompletionListeners$1$adapted(TaskContextImpl.scala:173)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;at org.apache.spark.TaskContextImpl.invokeListeners(TaskContextImpl.scala:228)&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 26 Apr 2024 08:28:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-read-old-version-of-file-read-modification-time-is-x/m-p/67359#M33328</guid>
      <dc:creator>stevenayers-bge</dc:creator>
      <dc:date>2024-04-26T08:28:05Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader: Read old version of file. Read modification time is X, latest modification time is X</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-read-old-version-of-file-read-modification-time-is-x/m-p/100934#M40480</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103678"&gt;@stevenayers-bge&lt;/a&gt;&amp;nbsp;Autoloader is designed to work best with immutable files. If files are mutable (i.e., they can be updated), it is recommended to set &lt;CODE&gt;cloudFiles.allowOverwrites = true&lt;/CODE&gt; to ensure that the latest version of the file is read.&lt;/P&gt;
&lt;P&gt;Please refer to the below documentation for more details:&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/faq.html#does-auto-loader-process-the-file-again-when-the-file-gets-appended-or-overwritten" target="_blank"&gt;https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/faq.html#does-auto-loader-process-the-file-again-when-the-file-gets-appended-or-overwritten&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 04 Dec 2024 14:47:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-read-old-version-of-file-read-modification-time-is-x/m-p/100934#M40480</guid>
      <dc:creator>PotnuruSiva</dc:creator>
      <dc:date>2024-12-04T14:47:42Z</dc:date>
    </item>
  </channel>
</rss>

