<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Is ZORDER required after table overwrite? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20839#M14113</link>
    <description>&lt;P&gt;the best way to avoid duplicates is the merge statements. &lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/sql/language-manual/delta-merge-into.html" target="test_blank"&gt;https://docs.databricks.com/sql/language-manual/delta-merge-into.html&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 24 Nov 2022 14:40:12 GMT</pubDate>
    <dc:creator>yogu</dc:creator>
    <dc:date>2022-11-24T14:40:12Z</dc:date>
    <item>
      <title>Is ZORDER required after table overwrite?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20837#M14111</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;After appending new values to a delta table, I need to delete duplicate rows.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;After deleting duplicate rows using PySpark, I overwrite the table (keeping the schema).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My question is, do I have to do ZORDER again?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Another question, is there another way to drop duplicates? I tried drop duplicates using SQL with CTE but that didn't work. (Error: Delete is only supported with v2 tables.)&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;# Append new data:
data.write.mode("append").format("delta").saveAsTable("table_name")
&amp;nbsp;
# Read table:
df = spark.sql(f"SELECT * FROM {table_name}")
# Drop Duplicates:
df = df.dropDuplicates(["col1", "col2"])
# Re-write data:
df.write.format("delta").mode("overwrite").option("overwriteSchema", "false").saveAsTable(f"{table_name}")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 03:05:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20837#M14111</guid>
      <dc:creator>numersoz</dc:creator>
      <dc:date>2022-11-24T03:05:12Z</dc:date>
    </item>
    <item>
      <title>Re: Is ZORDER required after table overwrite?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20838#M14112</link>
      <description>&lt;P&gt;Z-Ordering is not triggered by auto-optimize.  So you will have to specifically run it.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;A way to avoid dups is to use merge instead of append.  But it is possible that the runtime will be bigger than the append + dropdups.&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 09:55:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20838#M14112</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-11-24T09:55:32Z</dc:date>
    </item>
    <item>
      <title>Re: Is ZORDER required after table overwrite?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20839#M14113</link>
      <description>&lt;P&gt;the best way to avoid duplicates is the merge statements. &lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/sql/language-manual/delta-merge-into.html" target="test_blank"&gt;https://docs.databricks.com/sql/language-manual/delta-merge-into.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 14:40:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20839#M14113</guid>
      <dc:creator>yogu</dc:creator>
      <dc:date>2022-11-24T14:40:12Z</dc:date>
    </item>
    <item>
      <title>Re: Is ZORDER required after table overwrite?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20840#M14114</link>
      <description>&lt;P&gt;@Werner Stinckens​&amp;nbsp;Thank you!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am triggering Z-Order after table is created. But to be sure, if it is it required to re-do Z-Order after doing an overwrite to Delta Table (with schema overwrite set to false).&lt;/P&gt;</description>
      <pubDate>Thu, 24 Nov 2022 23:25:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20840#M14114</guid>
      <dc:creator>numersoz</dc:creator>
      <dc:date>2022-11-24T23:25:30Z</dc:date>
    </item>
    <item>
      <title>Re: Is ZORDER required after table overwrite?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20841#M14115</link>
      <description>&lt;P&gt;Z-ordering is never required, that also counts if you overwrite a z-ordered table.&lt;/P&gt;&lt;P&gt;The only thing that will happen is that the new data which is written is not z-ordered.&lt;/P&gt;</description>
      <pubDate>Fri, 25 Nov 2022 09:38:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20841#M14115</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-11-25T09:38:40Z</dc:date>
    </item>
    <item>
      <title>Re: Is ZORDER required after table overwrite?</title>
      <link>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20842#M14116</link>
      <description>&lt;P&gt;Hii @Nurettin Ersoz​&amp;nbsp;&lt;/P&gt;&lt;P&gt;try to use incremental load of data so it will avoid duplicate and you can use full load once if you have updation in your data&lt;/P&gt;</description>
      <pubDate>Sun, 27 Nov 2022 13:30:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-zorder-required-after-table-overwrite/m-p/20842#M14116</guid>
      <dc:creator>DeepakMakwana74</dc:creator>
      <dc:date>2022-11-27T13:30:50Z</dc:date>
    </item>
  </channel>
</rss>

