<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Autoloader  cluster in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22811#M15691</link>
    <description>&lt;P&gt;@Venkata Ramakrishna Alvakonda​&amp;nbsp;, No it is not required. Last position is stored in checkpoint file. New files are detected by directory listings or are stored in queue. &lt;/P&gt;</description>
    <pubDate>Thu, 14 Apr 2022 22:43:47 GMT</pubDate>
    <dc:creator>Hubert-Dudek</dc:creator>
    <dc:date>2022-04-14T22:43:47Z</dc:date>
    <item>
      <title>Autoloader  cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22810#M15690</link>
      <description>&lt;P&gt;I wanted to setup Autoloader to process files from Azure Data Lake (Blob) automatically whenever new files arrive.  For this to work, I wanted to know if AutoLoader requires that the cluster is on all the time. &lt;/P&gt;</description>
      <pubDate>Thu, 14 Apr 2022 16:47:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22810#M15690</guid>
      <dc:creator>RK_AV</dc:creator>
      <dc:date>2022-04-14T16:47:36Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader  cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22811#M15691</link>
      <description>&lt;P&gt;@Venkata Ramakrishna Alvakonda​&amp;nbsp;, No it is not required. Last position is stored in checkpoint file. New files are detected by directory listings or are stored in queue. &lt;/P&gt;</description>
      <pubDate>Thu, 14 Apr 2022 22:43:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22811#M15691</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-04-14T22:43:47Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader  cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22812#M15692</link>
      <description>&lt;P&gt;@Hubert Dudek​&amp;nbsp;, Thank you for your response. My question was: Does the cluster have to be on all the time to take advantage of Auto Loader? What happens if a file arrives in the blob storage and the cluster was down. Does it automatically start the cluster and then invoke the autoloader process to read the file? Or does the next time the cluster starts, it gets picked up? &lt;/P&gt;</description>
      <pubDate>Fri, 15 Apr 2022 02:02:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22812#M15692</guid>
      <dc:creator>RK_AV</dc:creator>
      <dc:date>2022-04-15T02:02:37Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader  cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22814#M15694</link>
      <description>&lt;P&gt;Thanks @Kaniz Fatma​&amp;nbsp; for the response. Unfortunately I dont have a set frequency for the arrival of files. It is very adhoc. Let me ask you this question. Is it possible for event grid to trigger a Databricks job? &lt;/P&gt;</description>
      <pubDate>Tue, 19 Apr 2022 18:59:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22814#M15694</guid>
      <dc:creator>RK_AV</dc:creator>
      <dc:date>2022-04-19T18:59:27Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader  cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22815#M15695</link>
      <description>&lt;P&gt;For sure you can try logic apps for triggering when something is in event grid and then notebook run.&lt;/P&gt;</description>
      <pubDate>Tue, 19 Apr 2022 19:31:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22815#M15695</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-04-19T19:31:00Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader  cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22816#M15696</link>
      <description>&lt;P&gt;@Kaniz Fatma​&amp;nbsp;, If my cluster is not active, and I have uploaded 50 files in storage location, then where this Auto Loader will list out these 50 files. Will it use any checkpoint location, if yes, then how can I set the checkpoint location in Cloud Storage for these new files identification? Can please tell me the backend process that is used to identifying these new files if my cluster is not active?&lt;/P&gt;</description>
      <pubDate>Sat, 05 Nov 2022 12:40:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-cluster/m-p/22816#M15696</guid>
      <dc:creator>asif5494</dc:creator>
      <dc:date>2022-11-05T12:40:17Z</dc:date>
    </item>
  </channel>
</rss>

