<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: History load from Source and in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107557#M42832</link>
    <description>&lt;P&gt;I imported 16 TB of data using ADF. In this scenario I'd create a process that will extract from a source data using ADF and then execute the rest of logic to populate tables in the gold. For the new data I'd create a separate process using Autoloader.&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 29 Jan 2025 11:01:35 GMT</pubDate>
    <dc:creator>MariuszK</dc:creator>
    <dc:date>2025-01-29T11:01:35Z</dc:date>
    <item>
      <title>History load from Source and</title>
      <link>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107311#M42769</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;/P&gt;&lt;P&gt;As part of our requirement we wanted to load a huge historical data from the Source System to Databricks in Bronze and then process it to Gold, We wanted to use batch with read and Write so that the historical load is done and then for the delta or Incremental load we wanted to use the readstream and writestream for the same table with checkpoint so that the tracking for incremental happens automatically. We wanted to use this approach as it was not possible to use streams for the historical load and later once this is done we wanted to use streams as the delta load will happen more frequent for every 15 mins. Any approaches on how this can be implemented.&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Jan 2025 00:43:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107311#M42769</guid>
      <dc:creator>maddan80</dc:creator>
      <dc:date>2025-01-28T00:43:58Z</dc:date>
    </item>
    <item>
      <title>Re: History load from Source and</title>
      <link>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107332#M42774</link>
      <description>&lt;P&gt;What is the size of your historical load and are you loading your historical data from a delta table?&lt;/P&gt;</description>
      <pubDate>Tue, 28 Jan 2025 05:24:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107332#M42774</guid>
      <dc:creator>Lakshay</dc:creator>
      <dc:date>2025-01-28T05:24:55Z</dc:date>
    </item>
    <item>
      <title>Re: History load from Source and</title>
      <link>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107453#M42804</link>
      <description>&lt;P&gt;around 2.5 billion records around 1TB&lt;/P&gt;</description>
      <pubDate>Tue, 28 Jan 2025 17:48:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107453#M42804</guid>
      <dc:creator>maddan80</dc:creator>
      <dc:date>2025-01-28T17:48:05Z</dc:date>
    </item>
    <item>
      <title>Re: History load from Source and</title>
      <link>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107557#M42832</link>
      <description>&lt;P&gt;I imported 16 TB of data using ADF. In this scenario I'd create a process that will extract from a source data using ADF and then execute the rest of logic to populate tables in the gold. For the new data I'd create a separate process using Autoloader.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jan 2025 11:01:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/history-load-from-source-and/m-p/107557#M42832</guid>
      <dc:creator>MariuszK</dc:creator>
      <dc:date>2025-01-29T11:01:35Z</dc:date>
    </item>
  </channel>
</rss>

