<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: concurrent update to same hive or deltalake table in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32034#M23354</link>
    <description>&lt;P&gt;Hi&lt;/P&gt;&lt;P&gt;Thanks for your answer.  &lt;/P&gt;&lt;P&gt;I found the deltalake on s3 has the following warning on the aws page.  &lt;/P&gt;&lt;P&gt;"Warning&lt;/P&gt;&lt;P&gt;Concurrent writes to the same Delta table from multiple Spark drivers can lead to data loss." &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;For single driver with multiple executors, will concurrent write to the same table be an issue as well? &lt;/P&gt;</description>
    <pubDate>Fri, 14 Jan 2022 01:00:47 GMT</pubDate>
    <dc:creator>Autel</dc:creator>
    <dc:date>2022-01-14T01:00:47Z</dc:date>
    <item>
      <title>concurrent update to same hive or deltalake table</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32031#M23351</link>
      <description>&lt;P&gt;HI, &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I'm interested to know if multiple executors to append the same hive table using saveAsTable or insertInto sparksql. will that cause any data corruption? What configuration do I need to enable concurrent write to same hive table? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;what about the same question for deltalake ? &lt;/P&gt;</description>
      <pubDate>Sun, 09 Jan 2022 05:31:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32031#M23351</guid>
      <dc:creator>Autel</dc:creator>
      <dc:date>2022-01-09T05:31:05Z</dc:date>
    </item>
    <item>
      <title>Re: concurrent update to same hive or deltalake table</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32032#M23352</link>
      <description>&lt;P&gt;The Hive table will not like this, as the underlying data is parquet format which is not ACID compliant.&lt;/P&gt;&lt;P&gt;Delta lake however is:&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.delta.io/0.5.0/concurrency-control.html" alt="https://docs.delta.io/0.5.0/concurrency-control.html" target="_blank"&gt;https://docs.delta.io/0.5.0/concurrency-control.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;You can see that inserts do not give conflicts.&lt;/P&gt;</description>
      <pubDate>Mon, 10 Jan 2022 09:21:46 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32032#M23352</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-01-10T09:21:46Z</dc:date>
    </item>
    <item>
      <title>Re: concurrent update to same hive or deltalake table</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32034#M23354</link>
      <description>&lt;P&gt;Hi&lt;/P&gt;&lt;P&gt;Thanks for your answer.  &lt;/P&gt;&lt;P&gt;I found the deltalake on s3 has the following warning on the aws page.  &lt;/P&gt;&lt;P&gt;"Warning&lt;/P&gt;&lt;P&gt;Concurrent writes to the same Delta table from multiple Spark drivers can lead to data loss." &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;For single driver with multiple executors, will concurrent write to the same table be an issue as well? &lt;/P&gt;</description>
      <pubDate>Fri, 14 Jan 2022 01:00:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32034#M23354</guid>
      <dc:creator>Autel</dc:creator>
      <dc:date>2022-01-14T01:00:47Z</dc:date>
    </item>
    <item>
      <title>Re: concurrent update to same hive or deltalake table</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32035#M23355</link>
      <description>&lt;P&gt;No because that is how spark works.&lt;/P&gt;&lt;P&gt;The driver defines which worker writes what and is up to speed with what is going on.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;That is also the reason that multiple drivers (read multiple spark programs) can give conflicts as the drivers do not know of each other what they are doing.&lt;/P&gt;</description>
      <pubDate>Fri, 14 Jan 2022 08:03:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-update-to-same-hive-or-deltalake-table/m-p/32035#M23355</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-01-14T08:03:26Z</dc:date>
    </item>
  </channel>
</rss>

