<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Seeking Advice on Data Lakehouse Architecture with Databricks in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/seeking-advice-on-data-lakehouse-architecture-with-databricks/m-p/90863#M38010</link>
    <description>&lt;P&gt;I'm currently designing a data lakehouse architecture using Databricks and have a few questions&lt;A href="https://compassmobile-dollartree.pro/" target="_self"&gt;.&lt;/A&gt; What are the best practices for efficiently ingesting both batch and streaming data into Delta Lake? Any recommended tools or approaches?&lt;/P&gt;</description>
    <pubDate>Wed, 18 Sep 2024 10:19:12 GMT</pubDate>
    <dc:creator>joshbuttler</dc:creator>
    <dc:date>2024-09-18T10:19:12Z</dc:date>
    <item>
      <title>Seeking Advice on Data Lakehouse Architecture with Databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/seeking-advice-on-data-lakehouse-architecture-with-databricks/m-p/90863#M38010</link>
      <description>&lt;P&gt;I'm currently designing a data lakehouse architecture using Databricks and have a few questions&lt;A href="https://compassmobile-dollartree.pro/" target="_self"&gt;.&lt;/A&gt; What are the best practices for efficiently ingesting both batch and streaming data into Delta Lake? Any recommended tools or approaches?&lt;/P&gt;</description>
      <pubDate>Wed, 18 Sep 2024 10:19:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/seeking-advice-on-data-lakehouse-architecture-with-databricks/m-p/90863#M38010</guid>
      <dc:creator>joshbuttler</dc:creator>
      <dc:date>2024-09-18T10:19:12Z</dc:date>
    </item>
    <item>
      <title>Re: Seeking Advice on Data Lakehouse Architecture with Databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/seeking-advice-on-data-lakehouse-architecture-with-databricks/m-p/90867#M38012</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/121430"&gt;@joshbuttler&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;I think the best way is to use auto loader, which&amp;nbsp;&lt;SPAN&gt;&amp;nbsp;provides a highly efficient way to incrementally process new data, while also guaranteeing each file is processed exactly once.&lt;BR /&gt;It supports ingestion in a batch mode (&lt;STRONG&gt;Trigger.AvailableNow()&lt;/STRONG&gt;) and you can also load data in streaming manner (under the hood it's using spark structured streaming). You have native support for variety of source files like JSON, PARQUET, CSV, XML to name a few&amp;nbsp; and also integration with streaming data sources like Kafka, Kinesis or EventHub.&lt;BR /&gt;&lt;BR /&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/ingestion/cloud-object-storage/auto-loader/" target="_blank" rel="noopener"&gt;What is Auto Loader? - Azure Databricks | Microsoft Learn&lt;/A&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;H2&gt;&amp;nbsp;&lt;/H2&gt;</description>
      <pubDate>Wed, 18 Sep 2024 11:02:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/seeking-advice-on-data-lakehouse-architecture-with-databricks/m-p/90867#M38012</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2024-09-18T11:02:15Z</dc:date>
    </item>
  </channel>
</rss>

