<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How is Idempotency ensured for COPY INTO command in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/54442#M2777</link>
    <description>&lt;P&gt;How does COPY_INTO work with table restore?&lt;/P&gt;&lt;P&gt;I made some tests, and the restore method does NOT restore the key-store values of the target at the specific version, which means that the data that came after the chosen version cannot be inserted (unless forced).&lt;/P&gt;&lt;P&gt;Is this behavior intended?&lt;/P&gt;</description>
    <pubDate>Fri, 01 Dec 2023 15:27:10 GMT</pubDate>
    <dc:creator>N_M</dc:creator>
    <dc:date>2023-12-01T15:27:10Z</dc:date>
    <item>
      <title>How is Idempotency ensured for COPY INTO command</title>
      <link>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/19795#M1083</link>
      <description />
      <pubDate>Fri, 25 Jun 2021 14:01:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/19795#M1083</guid>
      <dc:creator>brickster_2018</dc:creator>
      <dc:date>2021-06-25T14:01:28Z</dc:date>
    </item>
    <item>
      <title>Re: How is Idempotency ensured for COPY INTO command</title>
      <link>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/19796#M1084</link>
      <description>&lt;P&gt;COPY INTO command internally uses key-value store - RocksDB to store the details of the input files. This information is stored inside the Delta table log directory. This acts like the checkpointing information for a streaming query. Next time a COPY INTO command is triggered on the same table, as a first step, the data from the RocksDB is loaded and compared against the input files. Under the hood, a dedupe logic is performed to ensure idempotency.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;More details here:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-copy-into.html" target="test_blank"&gt;https://docs.databricks.com/spark/latest/spark-sql/language-manual/delta-copy-into.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;For COPY_OPTIONS, the parameter force if set to 'true', idempotency is disabled and files are loaded regardless of whether they’ve been loaded before.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 25 Jun 2021 14:02:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/19796#M1084</guid>
      <dc:creator>brickster_2018</dc:creator>
      <dc:date>2021-06-25T14:02:02Z</dc:date>
    </item>
    <item>
      <title>Re: How is Idempotency ensured for COPY INTO command</title>
      <link>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/54442#M2777</link>
      <description>&lt;P&gt;How does COPY_INTO work with table restore?&lt;/P&gt;&lt;P&gt;I made some tests, and the restore method does NOT restore the key-store values of the target at the specific version, which means that the data that came after the chosen version cannot be inserted (unless forced).&lt;/P&gt;&lt;P&gt;Is this behavior intended?&lt;/P&gt;</description>
      <pubDate>Fri, 01 Dec 2023 15:27:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/how-is-idempotency-ensured-for-copy-into-command/m-p/54442#M2777</guid>
      <dc:creator>N_M</dc:creator>
      <dc:date>2023-12-01T15:27:10Z</dc:date>
    </item>
  </channel>
</rss>

