<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Delta Live Tables use case in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/delta-live-tables-use-case/m-p/101417#M40652</link>
    <description>&lt;H2 class="mb-2 mt-6 text-lg first:mt-3"&gt;Using DLT for Your Use Case&lt;/H2&gt;
&lt;P&gt;&lt;SPAN&gt;DLT can be a good fit for your scenario, especially when implementing Slowly Changing Dimension (SCD) Type 2. Here's how you can approach this:&lt;/SPAN&gt;&lt;/P&gt;
&lt;OL class="marker:text-textOff list-decimal pl-8"&gt;
&lt;LI&gt;&lt;SPAN&gt;&lt;STRONG&gt;Ingestion with Auto Loader:&lt;/STRONG&gt;&amp;nbsp;Use Auto Loader to ingest the daily parquet files into your bronze layer. This handles the full overwrites efficiently.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;&lt;STRONG&gt;Bronze Layer Processing:&lt;/STRONG&gt;&amp;nbsp;Create a bronze table using DLT that reads from the landing area.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;&lt;STRONG&gt;SCD Type 2 Implementation:&lt;/STRONG&gt;&amp;nbsp;Implement SCD Type 2 in the silver layer using DLT's&amp;nbsp;&lt;CODE&gt;APPLY CHANGES&lt;/CODE&gt;&amp;nbsp;syntax.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;/OL&gt;
&lt;H2 class="mb-2 mt-6 text-lg first:mt-3"&gt;Implementation Approach&lt;/H2&gt;
&lt;P&gt;&lt;SPAN&gt;Here's a high-level implementation strategy:&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;&lt;STRONG&gt;Bronze Layer:&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/97035"&gt;@Dlt&lt;/a&gt;.table&lt;BR /&gt;def bronze_table():&lt;BR /&gt;return (&lt;BR /&gt;spark.readStream.format("cloudFiles")&lt;BR /&gt;.option("cloudFiles.format", "parquet")&lt;BR /&gt;.load("/path/to/landing/area")&lt;BR /&gt;)&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Silver Layer with SCD Type 2:&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;dlt.create_streaming_table("silver_table_scd2")&lt;/P&gt;
&lt;P&gt;dlt.apply_changes(&lt;BR /&gt;target = "silver_table_scd2",&lt;BR /&gt;source = "bronze_table",&lt;BR /&gt;keys = ["your_primary_key"],&lt;BR /&gt;sequence_by = col("file_modification_time"),&lt;BR /&gt;stored_as_scd_type = "2"&lt;BR /&gt;)&lt;/P&gt;</description>
    <pubDate>Mon, 09 Dec 2024 07:33:23 GMT</pubDate>
    <dc:creator>Sidhant07</dc:creator>
    <dc:date>2024-12-09T07:33:23Z</dc:date>
    <item>
      <title>Delta Live Tables use case</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-live-tables-use-case/m-p/63994#M32427</link>
      <description>&lt;P&gt;Hi all,&lt;/P&gt;&lt;P&gt;We have the following use case and wondering if DLT is the correct approach.&lt;/P&gt;&lt;P&gt;Landing area with daily dumps of parquet files into our Data Lake container.&lt;/P&gt;&lt;P&gt;The daily dump does a full overwrite of the parquet each time, keeping the same file name.&lt;/P&gt;&lt;P&gt;The idea would be to re-process the whole parquet each time and manage the delta in the bronze table with SCD 2.&lt;/P&gt;&lt;P&gt;Suggestions on the best approach would be helpful.&lt;/P&gt;&lt;P&gt;Cheers&lt;/P&gt;</description>
      <pubDate>Mon, 18 Mar 2024 13:31:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-live-tables-use-case/m-p/63994#M32427</guid>
      <dc:creator>Floody</dc:creator>
      <dc:date>2024-03-18T13:31:34Z</dc:date>
    </item>
    <item>
      <title>Re: Delta Live Tables use case</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-live-tables-use-case/m-p/101417#M40652</link>
      <description>&lt;H2 class="mb-2 mt-6 text-lg first:mt-3"&gt;Using DLT for Your Use Case&lt;/H2&gt;
&lt;P&gt;&lt;SPAN&gt;DLT can be a good fit for your scenario, especially when implementing Slowly Changing Dimension (SCD) Type 2. Here's how you can approach this:&lt;/SPAN&gt;&lt;/P&gt;
&lt;OL class="marker:text-textOff list-decimal pl-8"&gt;
&lt;LI&gt;&lt;SPAN&gt;&lt;STRONG&gt;Ingestion with Auto Loader:&lt;/STRONG&gt;&amp;nbsp;Use Auto Loader to ingest the daily parquet files into your bronze layer. This handles the full overwrites efficiently.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;&lt;STRONG&gt;Bronze Layer Processing:&lt;/STRONG&gt;&amp;nbsp;Create a bronze table using DLT that reads from the landing area.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;SPAN&gt;&lt;STRONG&gt;SCD Type 2 Implementation:&lt;/STRONG&gt;&amp;nbsp;Implement SCD Type 2 in the silver layer using DLT's&amp;nbsp;&lt;CODE&gt;APPLY CHANGES&lt;/CODE&gt;&amp;nbsp;syntax.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;/OL&gt;
&lt;H2 class="mb-2 mt-6 text-lg first:mt-3"&gt;Implementation Approach&lt;/H2&gt;
&lt;P&gt;&lt;SPAN&gt;Here's a high-level implementation strategy:&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;&lt;STRONG&gt;Bronze Layer:&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/97035"&gt;@Dlt&lt;/a&gt;.table&lt;BR /&gt;def bronze_table():&lt;BR /&gt;return (&lt;BR /&gt;spark.readStream.format("cloudFiles")&lt;BR /&gt;.option("cloudFiles.format", "parquet")&lt;BR /&gt;.load("/path/to/landing/area")&lt;BR /&gt;)&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Silver Layer with SCD Type 2:&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;dlt.create_streaming_table("silver_table_scd2")&lt;/P&gt;
&lt;P&gt;dlt.apply_changes(&lt;BR /&gt;target = "silver_table_scd2",&lt;BR /&gt;source = "bronze_table",&lt;BR /&gt;keys = ["your_primary_key"],&lt;BR /&gt;sequence_by = col("file_modification_time"),&lt;BR /&gt;stored_as_scd_type = "2"&lt;BR /&gt;)&lt;/P&gt;</description>
      <pubDate>Mon, 09 Dec 2024 07:33:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-live-tables-use-case/m-p/101417#M40652</guid>
      <dc:creator>Sidhant07</dc:creator>
      <dc:date>2024-12-09T07:33:23Z</dc:date>
    </item>
  </channel>
</rss>

