<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: autoloader running task batch in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123881#M47079</link>
    <description>&lt;P&gt;when i use&amp;nbsp;&lt;SPAN&gt;&amp;nbsp;.awaitTermination() it loop so long&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 03 Jul 2025 13:08:29 GMT</pubDate>
    <dc:creator>seefoods</dc:creator>
    <dc:date>2025-07-03T13:08:29Z</dc:date>
    <item>
      <title>autoloader running task batch</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123620#M47042</link>
      <description>&lt;P&gt;Hello Guys,&amp;nbsp;&lt;BR /&gt;I run task in batch mode with autoloader i enable option trigger (available now true). So, when my script finish, the continue running. Someone know who's happen?&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Cordially ;&lt;/P&gt;</description>
      <pubDate>Wed, 02 Jul 2025 09:51:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123620#M47042</guid>
      <dc:creator>seefoods</dc:creator>
      <dc:date>2025-07-02T09:51:25Z</dc:date>
    </item>
    <item>
      <title>Re: autoloader running task batch</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123878#M47077</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/65591"&gt;@seefoods&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;We usually use Trigger.AvailableNow when files arrive in batches rather than continuously. If your script keeps running even after processing, it could be that the job is still checking for any remaining files.&lt;/P&gt;&lt;P&gt;If it seems to be hanging for too long, try adding .awaitTermination()&lt;/P&gt;</description>
      <pubDate>Thu, 03 Jul 2025 12:56:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123878#M47077</guid>
      <dc:creator>SP_6721</dc:creator>
      <dc:date>2025-07-03T12:56:26Z</dc:date>
    </item>
    <item>
      <title>Re: autoloader running task batch</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123881#M47079</link>
      <description>&lt;P&gt;when i use&amp;nbsp;&lt;SPAN&gt;&amp;nbsp;.awaitTermination() it loop so long&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 03 Jul 2025 13:08:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123881#M47079</guid>
      <dc:creator>seefoods</dc:creator>
      <dc:date>2025-07-03T13:08:29Z</dc:date>
    </item>
    <item>
      <title>Re: autoloader running task batch</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123883#M47080</link>
      <description>&lt;P&gt;this is my script I enable this options when i read files on Volumes before write on delta table&lt;/P&gt;&lt;DIV&gt;&lt;PRE&gt;(reader_stream.option(&lt;SPAN&gt;"cloudFiles.format"&lt;/SPAN&gt;, &lt;SPAN&gt;self&lt;/SPAN&gt;.file_format)&lt;BR /&gt;                     .option(&lt;SPAN&gt;"cloudFiles.schemaLocation"&lt;/SPAN&gt;, &lt;SPAN&gt;self&lt;/SPAN&gt;.schema_location)&lt;BR /&gt;                     .option(&lt;SPAN&gt;"cloudFiles.useNotifications"&lt;/SPAN&gt;, True)&lt;BR /&gt;                     .option(&lt;SPAN&gt;"cloudFiles.validateOptions"&lt;/SPAN&gt;, True)&lt;BR /&gt;                     .option(&lt;SPAN&gt;"cloudFiles.schemaEvolutionMode"&lt;/SPAN&gt;, "rescue")&lt;BR /&gt;                     .option(&lt;SPAN&gt;"cloudFiles.maxFilesPerTrigger"&lt;/SPAN&gt;, 1000))&lt;/PRE&gt;&lt;/DIV&gt;&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN&gt;f &lt;/SPAN&gt;&lt;SPAN&gt;self&lt;/SPAN&gt;.autoloader_config.use_autoloader:&lt;BR /&gt;    logger_file_ingestion.info(&lt;SPAN&gt;"debut d'ecriture en mode streaming"&lt;/SPAN&gt;)&lt;BR /&gt;&lt;BR /&gt;    &lt;SPAN&gt;if &lt;/SPAN&gt;&lt;SPAN&gt;self&lt;/SPAN&gt;.write_mode.value.lower() == &lt;SPAN&gt;"append"&lt;/SPAN&gt;:&lt;BR /&gt;        logger_file_ingestion.info(&lt;SPAN&gt;"ecriture en mode %s"&lt;/SPAN&gt;, &lt;SPAN&gt;self&lt;/SPAN&gt;.write_mode.value)&lt;BR /&gt;&lt;BR /&gt;        &lt;SPAN&gt;# Création de la configuration de base du stream&lt;BR /&gt;&lt;/SPAN&gt;        stream_writer = (df.writeStream&lt;BR /&gt;                         .format(&lt;SPAN&gt;"delta"&lt;/SPAN&gt;)&lt;BR /&gt;                         .outputMode(&lt;SPAN&gt;"append"&lt;/SPAN&gt;)&lt;BR /&gt;                         .option(&lt;SPAN&gt;"checkpointLocation"&lt;/SPAN&gt;, &lt;SPAN&gt;self&lt;/SPAN&gt;.checkpoint_location)&lt;BR /&gt;                         .option(&lt;SPAN&gt;"mergeSchema"&lt;/SPAN&gt;, &lt;SPAN&gt;"true"&lt;/SPAN&gt;)&lt;BR /&gt;                         .trigger(&lt;SPAN&gt;availableNow&lt;/SPAN&gt;=&lt;SPAN&gt;True&lt;/SPAN&gt;))&lt;BR /&gt;&lt;BR /&gt;        &lt;SPAN&gt;# Ajout des partitions si nécessaire&lt;BR /&gt;&lt;/SPAN&gt;        &lt;SPAN&gt;if &lt;/SPAN&gt;(&lt;SPAN&gt;self&lt;/SPAN&gt;.source_name.lower() == &lt;SPAN&gt;"name"&lt;/SPAN&gt;) &lt;SPAN&gt;and &lt;/SPAN&gt;(&lt;SPAN&gt;self&lt;/SPAN&gt;.file_format.lower() == &lt;SPAN&gt;"parquet"&lt;/SPAN&gt;&lt;SPAN class="lia-unicode-emoji"&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/SPAN&gt;&lt;BR /&gt;            stream_writer = stream_writer.partitionBy(&lt;SPAN&gt;"year"&lt;/SPAN&gt;, &lt;SPAN&gt;"day"&lt;/SPAN&gt;, &lt;SPAN&gt;"month"&lt;/SPAN&gt;)&lt;BR /&gt;        &lt;SPAN&gt;elif &lt;/SPAN&gt;(&lt;SPAN&gt;self&lt;/SPAN&gt;.source_name.lower() == &lt;SPAN&gt;"test"&lt;/SPAN&gt;) &lt;SPAN&gt;and &lt;/SPAN&gt;(&lt;SPAN&gt;self&lt;/SPAN&gt;.file_format.lower() == &lt;SPAN&gt;"parquet"&lt;/SPAN&gt;&lt;SPAN class="lia-unicode-emoji"&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/SPAN&gt;&lt;BR /&gt;            stream_writer = stream_writer.partitionBy(&lt;SPAN&gt;"day"&lt;/SPAN&gt;, &lt;SPAN&gt;"month"&lt;/SPAN&gt;, &lt;SPAN&gt;"year&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;)&lt;BR /&gt;&lt;BR /&gt;        &lt;SPAN&gt;# Lancement du stream et capture de la référence&lt;BR /&gt;&lt;/SPAN&gt;        stream_writer.toTable("bronze")&lt;BR /&gt;       &amp;nbsp;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 03 Jul 2025 13:12:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-running-task-batch/m-p/123883#M47080</guid>
      <dc:creator>seefoods</dc:creator>
      <dc:date>2025-07-03T13:12:36Z</dc:date>
    </item>
  </channel>
</rss>

