<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Autoloader with filenotification in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52746#M29562</link>
    <description>&lt;P&gt;Can you set this value to higher number and try&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;cloudFiles.fetchParallelism its 1 by default&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 17 Nov 2023 20:14:09 GMT</pubDate>
    <dc:creator>Rdipak</dc:creator>
    <dc:date>2023-11-17T20:14:09Z</dc:date>
    <item>
      <title>Autoloader with filenotification</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52722#M29558</link>
      <description>&lt;P&gt;I am using DLT with filenotification and DLT job is just fetching 1 notification from SQS queue at a time. My pipeline is expected to process 500K notifications per day but it running hours behind. Any recommendations?&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;spark.readStream.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"cloudFiles"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"cloudFiles.schemaLocation"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;SPAN&gt;"/mnt/abc/"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'cloudFiles.format'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;'json'&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'cloudFiles.inferColumnTypes'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;'true'&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'cloudFiles.useNotifications'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'skipChangeCommits'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;'true'&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'cloudFiles.backfillInterval'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;'3 hour'&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;'cloudFiles.maxFilesPerTrigger'&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;10000&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;Logs:&lt;BR /&gt;NotificationFileEventFetcher: [queryId =] Fetched 1 messages from cloud queue storage.&lt;BR /&gt;NotificationFileEventFetcher: [queryId =] Fetched 1 messages from cloud queue storage.&lt;BR /&gt;NotificationFileEventFetcher: [queryId =] Fetched 1 messages from cloud queue storage.&lt;BR /&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Fri, 17 Nov 2023 17:46:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52722#M29558</guid>
      <dc:creator>kulkpd</dc:creator>
      <dc:date>2023-11-17T17:46:47Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader with filenotification</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52746#M29562</link>
      <description>&lt;P&gt;Can you set this value to higher number and try&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;cloudFiles.fetchParallelism its 1 by default&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Nov 2023 20:14:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52746#M29562</guid>
      <dc:creator>Rdipak</dc:creator>
      <dc:date>2023-11-17T20:14:09Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader with filenotification</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52806#M29604</link>
      <description>&lt;P&gt;Thanks&lt;SPAN&gt;cloudFiles.fetchParallelism to&lt;/SPAN&gt;&amp;nbsp;100 definitely helped to read more messages from SQS.&lt;BR /&gt;&lt;BR /&gt;NotificationFileEventFetcher: [queryId = 111] Fetched 100 messages from cloud queue storage&lt;/P&gt;</description>
      <pubDate>Fri, 17 Nov 2023 22:48:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-with-filenotification/m-p/52806#M29604</guid>
      <dc:creator>kulkpd</dc:creator>
      <dc:date>2023-11-17T22:48:16Z</dc:date>
    </item>
  </channel>
</rss>

