<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Near real time processing with CDC from snowflake to databricks in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/near-real-time-processing-with-cdc-from-snowflake-to-databricks/m-p/106508#M42502</link>
    <description>&lt;P&gt;it is like latency vs complexity and cost. you have to choose for yourself &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; for me option A sounds reasonable&lt;/P&gt;</description>
    <pubDate>Tue, 21 Jan 2025 14:36:40 GMT</pubDate>
    <dc:creator>saurabh18cs</dc:creator>
    <dc:date>2025-01-21T14:36:40Z</dc:date>
    <item>
      <title>Near real time processing with CDC from snowflake to databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/near-real-time-processing-with-cdc-from-snowflake-to-databricks/m-p/106507#M42501</link>
      <description>&lt;DIV class=""&gt;&lt;SPAN&gt;Hi&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;I would like to configure near real time streaming on Databricks to process data as soon as a new data finish processing on snowflake e.g. with DLT pipelins and Auto Loader. Which option would be better for this setup?&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class=""&gt;Option A)&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;Export the Snowpark DataFrame to Databricks to an external cloud storage (e.g. S3 as parquet).&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV class=""&gt;Option B)&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;use&amp;nbsp;apache iceberg&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;with polaris and configure from Databricks in order to read that information.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Tue, 21 Jan 2025 14:45:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/near-real-time-processing-with-cdc-from-snowflake-to-databricks/m-p/106507#M42501</guid>
      <dc:creator>abelian-grape</dc:creator>
      <dc:date>2025-01-21T14:45:31Z</dc:date>
    </item>
    <item>
      <title>Re: Near real time processing with CDC from snowflake to databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/near-real-time-processing-with-cdc-from-snowflake-to-databricks/m-p/106508#M42502</link>
      <description>&lt;P&gt;it is like latency vs complexity and cost. you have to choose for yourself &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt; for me option A sounds reasonable&lt;/P&gt;</description>
      <pubDate>Tue, 21 Jan 2025 14:36:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/near-real-time-processing-with-cdc-from-snowflake-to-databricks/m-p/106508#M42502</guid>
      <dc:creator>saurabh18cs</dc:creator>
      <dc:date>2025-01-21T14:36:40Z</dc:date>
    </item>
  </channel>
</rss>

