<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: ETL pipeline in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115839#M45199</link>
    <description>&lt;P&gt;Databricks doesn't support duration-based schedules directly, but you can simulate this using cron syntax.&lt;BR /&gt;Use This Cron Expression :&amp;nbsp;0 10-19 * * *&lt;BR /&gt;To ensure compute is not running outside of these hours&amp;nbsp;Set Auto-Termination to a low value like 15 mins&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 18 Apr 2025 11:03:09 GMT</pubDate>
    <dc:creator>tltharani</dc:creator>
    <dc:date>2025-04-18T11:03:09Z</dc:date>
    <item>
      <title>ETL pipeline</title>
      <link>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115837#M45197</link>
      <description>&lt;P&gt;I have an ETL pipeline in workflows which I am using to create materialized view. I want to schedule the pipeline for 10 hours only starting from 10 am. How can I schedule that? I can only see hourly basis schedule or cron syntax. I want the compute to be up for 10 hours and then terminate.&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;Yogesh&lt;/P&gt;</description>
      <pubDate>Fri, 18 Apr 2025 10:43:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115837#M45197</guid>
      <dc:creator>Yunky007</dc:creator>
      <dc:date>2025-04-18T10:43:07Z</dc:date>
    </item>
    <item>
      <title>Re: ETL pipeline</title>
      <link>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115839#M45199</link>
      <description>&lt;P&gt;Databricks doesn't support duration-based schedules directly, but you can simulate this using cron syntax.&lt;BR /&gt;Use This Cron Expression :&amp;nbsp;0 10-19 * * *&lt;BR /&gt;To ensure compute is not running outside of these hours&amp;nbsp;Set Auto-Termination to a low value like 15 mins&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 18 Apr 2025 11:03:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115839#M45199</guid>
      <dc:creator>tltharani</dc:creator>
      <dc:date>2025-04-18T11:03:09Z</dc:date>
    </item>
    <item>
      <title>Re: ETL pipeline</title>
      <link>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115844#M45200</link>
      <description>&lt;P&gt;Hey&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/160036"&gt;@Yunky007&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;You should use the cron expression &lt;SPAN class=""&gt;0 10 * * *&lt;/SPAN&gt; to start the process at 10 AM.&lt;BR /&gt;Then, inside your script, implement a loop or mechanism that keeps the logic running for 10 hours, that’s the trick.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import time
from datetime import datetime, timedelta

start_time = datetime.now()
end_time = start_time + timedelta(hours=10)

while datetime.now() &amp;lt; end_time:
    # Logic
    spark.sql("REFRESH MATERIALIZED VIEW my_catalog.my_schema.my_mv")

    # Wait time between executions
    time.sleep(60 * 60)  # 3600 secs = 1 h&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Hope this helps &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;BR /&gt;&lt;BR /&gt;Isi&lt;/P&gt;</description>
      <pubDate>Fri, 18 Apr 2025 12:37:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/115844#M45200</guid>
      <dc:creator>Isi</dc:creator>
      <dc:date>2025-04-18T12:37:14Z</dc:date>
    </item>
    <item>
      <title>Re: ETL pipeline</title>
      <link>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/117685#M45546</link>
      <description>&lt;P&gt;Use cron syntax with a stop condition after 10 hours runtime.&lt;/P&gt;</description>
      <pubDate>Mon, 05 May 2025 11:12:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/etl-pipeline/m-p/117685#M45546</guid>
      <dc:creator>KaelaniBraster</dc:creator>
      <dc:date>2025-05-05T11:12:10Z</dc:date>
    </item>
  </channel>
</rss>

