<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Databricks now supports event-driven workloads, especially for loading cloud files from external locations. This means you can save costs and resource... in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/7749#M3525</link>
    <description>&lt;P&gt;Databricks now supports event-driven workloads, especially for loading cloud files from external locations. This means you can save costs and resources by triggering your Databricks jobs only when new files arrive in your cloud storage instead of mounting it as DBFS and polling it periodically. To use this feature, you need to follow these steps:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Add an external location for your ADLS2 container,&lt;/LI&gt;&lt;LI&gt;Make sure the storage credentials you use (such as Access Connector, service principal, or managed identity) have Storage Blob Data Contributor permissions for that container,&lt;/LI&gt;&lt;LI&gt;Make sure the account you use to run your workload has at least read files permission for the external location,&lt;/LI&gt;&lt;LI&gt;Write a notebook that loads cloud files from the external location,&lt;/LI&gt;&lt;LI&gt;Set a file arrival trigger for your workflow and specify the exact external location as the source.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;With these steps, you can easily create and run event-driven workloads on Databricks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="ezgif-3-946af786d0"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/540iC77C974D82A24DC2/image-size/large?v=v2&amp;amp;px=999" role="button" title="ezgif-3-946af786d0" alt="ezgif-3-946af786d0" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 14 Mar 2023 13:39:22 GMT</pubDate>
    <dc:creator>Hubert-Dudek</dc:creator>
    <dc:date>2023-03-14T13:39:22Z</dc:date>
    <item>
      <title>Databricks now supports event-driven workloads, especially for loading cloud files from external locations. This means you can save costs and resource...</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/7749#M3525</link>
      <description>&lt;P&gt;Databricks now supports event-driven workloads, especially for loading cloud files from external locations. This means you can save costs and resources by triggering your Databricks jobs only when new files arrive in your cloud storage instead of mounting it as DBFS and polling it periodically. To use this feature, you need to follow these steps:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Add an external location for your ADLS2 container,&lt;/LI&gt;&lt;LI&gt;Make sure the storage credentials you use (such as Access Connector, service principal, or managed identity) have Storage Blob Data Contributor permissions for that container,&lt;/LI&gt;&lt;LI&gt;Make sure the account you use to run your workload has at least read files permission for the external location,&lt;/LI&gt;&lt;LI&gt;Write a notebook that loads cloud files from the external location,&lt;/LI&gt;&lt;LI&gt;Set a file arrival trigger for your workflow and specify the exact external location as the source.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;With these steps, you can easily create and run event-driven workloads on Databricks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="ezgif-3-946af786d0"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/540iC77C974D82A24DC2/image-size/large?v=v2&amp;amp;px=999" role="button" title="ezgif-3-946af786d0" alt="ezgif-3-946af786d0" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 14 Mar 2023 13:39:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/7749#M3525</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-03-14T13:39:22Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64138#M32475</link>
      <description>&lt;P&gt;Hey,&lt;/P&gt;&lt;P&gt;We have a use case where we have Salesforce generating Change Data Capture (CDC) platform events. With this new event driven workload, can Databricks directly consume these CDC events from Salesforce?&lt;/P&gt;&lt;P&gt;We are currently also evaluating a middleware like Mulesoft are directed in this reference article:&amp;nbsp;&lt;A class="" href="https://developer.mulesoft.com/tutorials-and-howtos/integrations/salesforce-connector/subscribe-to-cdc-events/" target="_blank" rel="noopener noreferrer"&gt;Subscribe to Change Data Capture Events with the Salesforce Connector&lt;/A&gt;. However we are concerned about the pricing of Mulesoft.&lt;/P&gt;</description>
      <pubDate>Wed, 20 Mar 2024 03:04:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64138#M32475</guid>
      <dc:creator>Salesforce</dc:creator>
      <dc:date>2024-03-20T03:04:35Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64150#M32479</link>
      <description>&lt;P&gt;I think we are talking about file events here.&lt;BR /&gt;What you are talking about is in fact streaming ingest from a CDC system.&amp;nbsp; That can be done but not by directly connecting to the CDC.&amp;nbsp; You can forward the CDC events to a event queue like Kafka etc, and let Spark subscribe to one of those topics.&lt;BR /&gt;Mule soft probably works too, but honestly as you already mentioned, it is overpriced.&lt;BR /&gt;What is presented here was already possible in many other systems, but is now also added in Databricks.&lt;/P&gt;</description>
      <pubDate>Wed, 20 Mar 2024 08:03:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64150#M32479</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-03-20T08:03:12Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64327#M32540</link>
      <description>&lt;P&gt;while this works great with new files is it possible to trigger when update happens to existing file?&lt;/P&gt;</description>
      <pubDate>Thu, 21 Mar 2024 18:34:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64327#M32540</guid>
      <dc:creator>Floody</dc:creator>
      <dc:date>2024-03-21T18:34:42Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64529#M32596</link>
      <description>&lt;P&gt;the event triggers on file events in blob storage, which are typically immutable, meaning files cannot be updated, only created or deleted, overwritten.&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2024 15:16:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64529#M32596</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-03-25T15:16:50Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64538#M32601</link>
      <description>&lt;P&gt;Yes, the file is getting overwritten, but trigger is not happening. Maybe I am missing something?&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2024 18:29:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64538#M32601</guid>
      <dc:creator>Floody</dc:creator>
      <dc:date>2024-03-25T18:29:28Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64648#M32613</link>
      <description>&lt;P&gt;probably the event is not triggered by an overwrite, can you test with delete followed by a created?&lt;/P&gt;</description>
      <pubDate>Tue, 26 Mar 2024 13:06:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/64648#M32613</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-03-26T13:06:13Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/67375#M33330</link>
      <description>&lt;P&gt;For reference, the trigger will not contain any information on the event itself (like file names etc), so you cannot build a dynamic event-driven architecture with this trigger.&lt;/P&gt;</description>
      <pubDate>Fri, 26 Apr 2024 10:39:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/67375#M33330</guid>
      <dc:creator>adriennn</dc:creator>
      <dc:date>2024-04-26T10:39:24Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/67388#M33331</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/86227"&gt;@adriennn&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;That's because it's only one of the trigger types. To load newly arrived files automatically you can utilize AutoLoader.&lt;/P&gt;</description>
      <pubDate>Fri, 26 Apr 2024 11:03:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/67388#M33331</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2024-04-26T11:03:39Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks now supports event-driven workloads, especially for loading cloud files from external</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/67394#M33333</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/79106"&gt;@daniel_sahal&lt;/a&gt;&amp;nbsp;I get your point, but if for a scheduled trigger you can get all kind of attributes on the trigger time (arguably, this is available for all the triggers), then why wouldn't the most important attribute of a file event not be available through the trigger?&lt;BR /&gt;&lt;BR /&gt;What I'm thinking is something like:&lt;BR /&gt;job.trigger.file_arrival.file_path,&amp;nbsp;job.trigger.file_arrival.parent_folder,&amp;nbsp; etc.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="adriennn_0-1714136125829.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/7270i6F12333951F0EF5D/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="adriennn_0-1714136125829.png" alt="adriennn_0-1714136125829.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 26 Apr 2024 12:59:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-now-supports-event-driven-workloads-especially-for/m-p/67394#M33333</guid>
      <dc:creator>adriennn</dc:creator>
      <dc:date>2024-04-26T12:59:19Z</dc:date>
    </item>
  </channel>
</rss>

