<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Auto loader in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/auto-loader/m-p/62560#M31999</link>
    <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/100565"&gt;@BhaveshPatel&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Three things that you can do:&lt;/P&gt;&lt;P&gt;- Move the files to the separate folder,&lt;BR /&gt;- Use a filter on metadata fields to filter out the unnecessary files,&lt;BR /&gt;- Use a&amp;nbsp;&lt;A href="https://spark.apache.org/docs/latest/sql-data-sources-generic-options.html#path-glob-filter" target="_self"&gt;pathGlobFilter&lt;/A&gt;&amp;nbsp;to filter only on the files you need&lt;/P&gt;</description>
    <pubDate>Mon, 04 Mar 2024 08:59:11 GMT</pubDate>
    <dc:creator>daniel_sahal</dc:creator>
    <dc:date>2024-03-04T08:59:11Z</dc:date>
    <item>
      <title>Auto loader</title>
      <link>https://community.databricks.com/t5/data-engineering/auto-loader/m-p/62285#M31941</link>
      <description>&lt;P&gt;Suppose I have 1000's of historical .csv files stored from Jan, 2022 in a folder of my azure blob storage container. I want to use auto loader to read files beginning only on 1st, Oct, 2023 and ignoring all the files before this date to build a pipeline to read multiple files that are ingested daily.&lt;BR /&gt;Is this possible using Auto loader, if yes, how?&lt;BR /&gt;0 responses . Be the first to respond&lt;/P&gt;</description>
      <pubDate>Thu, 29 Feb 2024 02:08:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/auto-loader/m-p/62285#M31941</guid>
      <dc:creator>BhaveshPatel</dc:creator>
      <dc:date>2024-02-29T02:08:01Z</dc:date>
    </item>
    <item>
      <title>Re: Auto loader</title>
      <link>https://community.databricks.com/t5/data-engineering/auto-loader/m-p/62560#M31999</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/100565"&gt;@BhaveshPatel&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Three things that you can do:&lt;/P&gt;&lt;P&gt;- Move the files to the separate folder,&lt;BR /&gt;- Use a filter on metadata fields to filter out the unnecessary files,&lt;BR /&gt;- Use a&amp;nbsp;&lt;A href="https://spark.apache.org/docs/latest/sql-data-sources-generic-options.html#path-glob-filter" target="_self"&gt;pathGlobFilter&lt;/A&gt;&amp;nbsp;to filter only on the files you need&lt;/P&gt;</description>
      <pubDate>Mon, 04 Mar 2024 08:59:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/auto-loader/m-p/62560#M31999</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2024-03-04T08:59:11Z</dc:date>
    </item>
  </channel>
</rss>

