<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Approach to monthly data snapshots in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79819#M35857</link>
    <description>&lt;P&gt;Hello&lt;BR /&gt;I'm building a datawarehouse with all the usual facts and dimensions&lt;BR /&gt;It will flush (truncate) and rebuild on a monthly basis&lt;BR /&gt;Users have the need to not only view the data now but also view it historically i.e. what it was a point in time&lt;BR /&gt;My initial thought was to use time travel functionality but, am I right in saying that that's the wrong approach here, and we should really archive or apply a date filter to the data?&lt;/P&gt;&lt;P&gt;So, an alternate archiving strategy would be better?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 22 Jul 2024 08:08:45 GMT</pubDate>
    <dc:creator>dpc</dc:creator>
    <dc:date>2024-07-22T08:08:45Z</dc:date>
    <item>
      <title>Approach to monthly data snapshots</title>
      <link>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79819#M35857</link>
      <description>&lt;P&gt;Hello&lt;BR /&gt;I'm building a datawarehouse with all the usual facts and dimensions&lt;BR /&gt;It will flush (truncate) and rebuild on a monthly basis&lt;BR /&gt;Users have the need to not only view the data now but also view it historically i.e. what it was a point in time&lt;BR /&gt;My initial thought was to use time travel functionality but, am I right in saying that that's the wrong approach here, and we should really archive or apply a date filter to the data?&lt;/P&gt;&lt;P&gt;So, an alternate archiving strategy would be better?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 22 Jul 2024 08:08:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79819#M35857</guid>
      <dc:creator>dpc</dc:creator>
      <dc:date>2024-07-22T08:08:45Z</dc:date>
    </item>
    <item>
      <title>Re: Approach to monthly data snapshots</title>
      <link>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79820#M35858</link>
      <description>&lt;P&gt;This sounds like a &lt;A href="https://en.wikipedia.org/wiki/Slowly_changing_dimension#Type_2:_add_new_row" target="_self"&gt;SCD-2.&lt;/A&gt;&amp;nbsp;Pick the approach which fits best for your use case.&lt;/P&gt;&lt;P&gt;FWIW, time traveling isn't supposed to be used for business queries, only for technical (e.g. select yesterday's changes). Due to the fact that historical entries are not persistent and will be removed ("vacuum") after a defined period.&lt;/P&gt;</description>
      <pubDate>Mon, 22 Jul 2024 08:21:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79820#M35858</guid>
      <dc:creator>Witold</dc:creator>
      <dc:date>2024-07-22T08:21:08Z</dc:date>
    </item>
    <item>
      <title>Re: Approach to monthly data snapshots</title>
      <link>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79821#M35859</link>
      <description>&lt;P&gt;Agree with Witold.&lt;SPAN&gt;&amp;nbsp;Best practice is to use time travel for recoveries from issues between 7 and 30 days. (you can set much higher period, but it's not recommended).&lt;BR /&gt;Beyond that use snapshots for archival&amp;nbsp; history.&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 22 Jul 2024 08:25:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/79821#M35859</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2024-07-22T08:25:29Z</dc:date>
    </item>
    <item>
      <title>Re: Approach to monthly data snapshots</title>
      <link>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/80546#M36058</link>
      <description>&lt;P&gt;Great, thanks&lt;/P&gt;</description>
      <pubDate>Thu, 25 Jul 2024 12:16:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/approach-to-monthly-data-snapshots/m-p/80546#M36058</guid>
      <dc:creator>dpc</dc:creator>
      <dc:date>2024-07-25T12:16:51Z</dc:date>
    </item>
  </channel>
</rss>

