<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Delta lake schema enforcement allows datatype mismatch on write using MERGE-operation [python] in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4924#M1496</link>
    <description>&lt;P&gt;Databricks Runtime: 12.2 LTS, Spark: 3.3.2, Delta Lake: 2.2.0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;A target table with schema ([c1: integer, c2: integer]), allows us to write into target table using data with schema ([c1: integer, c2: double]). I expected it to throw an exception (same as it does using normal spark write INSERT operation), but instead it stored the data with mismatched datatype for field c2.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType
from delta import DeltaTable
  
# Source data
schema = StructType([StructField("c1", IntegerType(), False), StructField("c2", DoubleType(), False)])
rdd_output = spark.sparkContext.parallelize([(4, 1.4), (5, 5.0), (6, 3.5),])
df_source = spark.createDataFrame(rdd_output, schema=schema)
  
# write source to target table using merge
target_table = DeltaTable.forName(spark, "default.test_datatype_misalignment")
merge = target_table.alias("target").merge(df_source.alias("source"), "target.c1 = source.c1")          
merge.whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
spark.table("default.test_datatype_misalignment").show()
 
# OUTPUT
#+---+---+
#| c1| c2|
#+---+---+
#|  1|  1|
#|  2|  1|
#|  3|  5|
#|  4|  1|
#|  5|  5|
#|  6|  3|
#+---+---+
  
# write source to target table using insert
df_source.write.format("delta").mode("append").saveAsTable("default.test_datatype_misalignment")
 
# OUTPUT
#AnalysisException: Failed to merge fields 'c2' and 'c2'. Failed to merge incompatible data types IntegerType and DoubleType&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I'am expecting an exception to be raised regardless of the write command, why is this not the case?&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 02 May 2023 08:38:52 GMT</pubDate>
    <dc:creator>signo</dc:creator>
    <dc:date>2023-05-02T08:38:52Z</dc:date>
    <item>
      <title>Delta lake schema enforcement allows datatype mismatch on write using MERGE-operation [python]</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4924#M1496</link>
      <description>&lt;P&gt;Databricks Runtime: 12.2 LTS, Spark: 3.3.2, Delta Lake: 2.2.0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;A target table with schema ([c1: integer, c2: integer]), allows us to write into target table using data with schema ([c1: integer, c2: double]). I expected it to throw an exception (same as it does using normal spark write INSERT operation), but instead it stored the data with mismatched datatype for field c2.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql.types import StructType, StructField, IntegerType, DoubleType
from delta import DeltaTable
  
# Source data
schema = StructType([StructField("c1", IntegerType(), False), StructField("c2", DoubleType(), False)])
rdd_output = spark.sparkContext.parallelize([(4, 1.4), (5, 5.0), (6, 3.5),])
df_source = spark.createDataFrame(rdd_output, schema=schema)
  
# write source to target table using merge
target_table = DeltaTable.forName(spark, "default.test_datatype_misalignment")
merge = target_table.alias("target").merge(df_source.alias("source"), "target.c1 = source.c1")          
merge.whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
spark.table("default.test_datatype_misalignment").show()
 
# OUTPUT
#+---+---+
#| c1| c2|
#+---+---+
#|  1|  1|
#|  2|  1|
#|  3|  5|
#|  4|  1|
#|  5|  5|
#|  6|  3|
#+---+---+
  
# write source to target table using insert
df_source.write.format("delta").mode("append").saveAsTable("default.test_datatype_misalignment")
 
# OUTPUT
#AnalysisException: Failed to merge fields 'c2' and 'c2'. Failed to merge incompatible data types IntegerType and DoubleType&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I'am expecting an exception to be raised regardless of the write command, why is this not the case?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 02 May 2023 08:38:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4924#M1496</guid>
      <dc:creator>signo</dc:creator>
      <dc:date>2023-05-02T08:38:52Z</dc:date>
    </item>
    <item>
      <title>Re: Delta lake schema enforcement allows datatype mismatch on write using MERGE-operation [python]</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4925#M1497</link>
      <description>&lt;P&gt;perhaps schema evolution is enabled?&lt;/P&gt;</description>
      <pubDate>Tue, 02 May 2023 11:32:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4925#M1497</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-05-02T11:32:39Z</dc:date>
    </item>
    <item>
      <title>Re: Delta lake schema enforcement allows datatype mismatch on write using MERGE-operation [python]</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4927#M1499</link>
      <description>&lt;P&gt;Hi @Sigrun Nordli​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you for posting your question in our community! We are happy to assist you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;To help us provide you with the most accurate information, could you please take a moment to review the responses and select the one that best answers your question?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;This will also help other community members who may have similar questions in the future. Thank you for your participation and let us know if you need any further assistance!&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 19 May 2023 06:31:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-lake-schema-enforcement-allows-datatype-mismatch-on-write/m-p/4927#M1499</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-05-19T06:31:02Z</dc:date>
    </item>
  </channel>
</rss>

