<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Autoloader start and end date for ingestion in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-start-and-end-date-for-ingestion/m-p/45523#M27914</link>
    <description>&lt;P&gt;I have been searching for a way to set up backfilling using autoloader with an option to set a "start_date" or "end_date". I am working on ingesting a massive file system but I don't want to ingest everything from the beginning. I have a start date that I want to perform the first big ingestion to populate the most recent data into my database and then over time slowly backfill the older data. Is this functionality currently in the autoloader settings, and if not, any suggestions on how to approach this issue?&lt;/P&gt;</description>
    <pubDate>Thu, 21 Sep 2023 13:04:48 GMT</pubDate>
    <dc:creator>kmorton</dc:creator>
    <dc:date>2023-09-21T13:04:48Z</dc:date>
    <item>
      <title>Autoloader start and end date for ingestion</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-start-and-end-date-for-ingestion/m-p/45523#M27914</link>
      <description>&lt;P&gt;I have been searching for a way to set up backfilling using autoloader with an option to set a "start_date" or "end_date". I am working on ingesting a massive file system but I don't want to ingest everything from the beginning. I have a start date that I want to perform the first big ingestion to populate the most recent data into my database and then over time slowly backfill the older data. Is this functionality currently in the autoloader settings, and if not, any suggestions on how to approach this issue?&lt;/P&gt;</description>
      <pubDate>Thu, 21 Sep 2023 13:04:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-start-and-end-date-for-ingestion/m-p/45523#M27914</guid>
      <dc:creator>kmorton</dc:creator>
      <dc:date>2023-09-21T13:04:48Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader start and end date for ingestion</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-start-and-end-date-for-ingestion/m-p/101279#M40614</link>
      <description>&lt;P&gt;If the files have already been loaded by autoloader (like same name and path), this can be tricky.&lt;/P&gt;
&lt;P&gt;I recommend starting a separate autoloader stream and specifying filters on it to match your start and end dates. If you'd instead like to rely on the modification timestamps of the files, you can use the modifiedBefore and modifiedAfter options.&lt;/P&gt;</description>
      <pubDate>Fri, 06 Dec 2024 20:42:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-start-and-end-date-for-ingestion/m-p/101279#M40614</guid>
      <dc:creator>cgrant</dc:creator>
      <dc:date>2024-12-06T20:42:23Z</dc:date>
    </item>
  </channel>
</rss>

