<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Overwriting mode do not overwrite in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/overwriting-mode-do-not-overwrite/m-p/77150#M35407</link>
    <description>&lt;P&gt;ok I get the Issue&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;.option(&lt;SPAN class=""&gt;"mergeSchema"&lt;/SPAN&gt;, &lt;SPAN class=""&gt;"true"&lt;/SPAN&gt;)&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;Is usefull to add more columns, but if you want to reduce columns in your target delta.&lt;/P&gt;&lt;P&gt;Then you need&lt;/P&gt;&lt;PRE&gt;.option(&lt;SPAN class=""&gt;"overwriteSchema"&lt;/SPAN&gt;, &lt;SPAN class=""&gt;"true"&lt;/SPAN&gt;)&amp;nbsp;&lt;/PRE&gt;</description>
    <pubDate>Mon, 08 Jul 2024 13:05:15 GMT</pubDate>
    <dc:creator>Enrique1987</dc:creator>
    <dc:date>2024-07-08T13:05:15Z</dc:date>
    <item>
      <title>Overwriting mode do not overwrite</title>
      <link>https://community.databricks.com/t5/data-engineering/overwriting-mode-do-not-overwrite/m-p/77145#M35405</link>
      <description>&lt;P&gt;&lt;SPAN&gt;I have the following code&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Previously I have a delta table with&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;180&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;columns in&amp;nbsp;&lt;/SPAN&gt;my_path&lt;SPAN&gt;´, I select a column and try to overwrite&lt;/SPAN&gt;&lt;/P&gt;&lt;PRE&gt;  &lt;BR /&gt;    columns_to_select = [&lt;SPAN class=""&gt;"one_column"&lt;/SPAN&gt;]
    df_one_column = df.select(*columns_to_select)
    df_one_column.write.&lt;SPAN class=""&gt;format&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"delta"&lt;/SPAN&gt;).mode(&lt;SPAN class=""&gt;"overwrite"&lt;/SPAN&gt;).option(&lt;SPAN class=""&gt;"mergeSchema"&lt;/SPAN&gt;, &lt;SPAN class=""&gt;"true"&lt;/SPAN&gt;).save(my_path)
    
    new_schema = spark.read.&lt;SPAN class=""&gt;format&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"delta"&lt;/SPAN&gt;).load(my_path).schema
    target_column = [field.name &lt;SPAN class=""&gt;for&lt;/SPAN&gt; field &lt;SPAN class=""&gt;in&lt;/SPAN&gt; new_schema.fields]
    &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;len&lt;/SPAN&gt;(target_column)) &lt;SPAN class=""&gt;# return 180&lt;/SPAN&gt;&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;that returns 180 instead 1, I don understand why and chatgpt 4o neither&amp;nbsp; thas why I m here.&lt;BR /&gt;&lt;BR /&gt;Thanks in advance, Enrique&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 08 Jul 2024 12:07:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/overwriting-mode-do-not-overwrite/m-p/77145#M35405</guid>
      <dc:creator>Enrique1987</dc:creator>
      <dc:date>2024-07-08T12:07:45Z</dc:date>
    </item>
    <item>
      <title>Re: Overwriting mode do not overwrite</title>
      <link>https://community.databricks.com/t5/data-engineering/overwriting-mode-do-not-overwrite/m-p/77150#M35407</link>
      <description>&lt;P&gt;ok I get the Issue&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;.option(&lt;SPAN class=""&gt;"mergeSchema"&lt;/SPAN&gt;, &lt;SPAN class=""&gt;"true"&lt;/SPAN&gt;)&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;Is usefull to add more columns, but if you want to reduce columns in your target delta.&lt;/P&gt;&lt;P&gt;Then you need&lt;/P&gt;&lt;PRE&gt;.option(&lt;SPAN class=""&gt;"overwriteSchema"&lt;/SPAN&gt;, &lt;SPAN class=""&gt;"true"&lt;/SPAN&gt;)&amp;nbsp;&lt;/PRE&gt;</description>
      <pubDate>Mon, 08 Jul 2024 13:05:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/overwriting-mode-do-not-overwrite/m-p/77150#M35407</guid>
      <dc:creator>Enrique1987</dc:creator>
      <dc:date>2024-07-08T13:05:15Z</dc:date>
    </item>
  </channel>
</rss>

