<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Declarative Pipelines: set Merge Schema to False in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140753#M51521</link>
    <description>&lt;P&gt;Hi&amp;nbsp; &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;SPAN&gt;DLT is designed to automatically evolve the schema of tables as your pipeline logic changes. If your code returns a DataFrame with new columns, DLT will add those columns to the table automatically. There is no built-in option to prevent this or to enforce a fixed schema.&lt;BR /&gt;1) you can try enforcing&amp;nbsp;&lt;STRONG&gt;explicit schema definitions in your pipeline code??&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;also try ,&amp;nbsp;&lt;SPAN&gt;Disablng schema auto merge globally&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;!--   ScriptorStartFragment   --&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;2) spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", "false")&lt;!--   ScriptorEndFragment   --&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;DIV class=""&gt;example :&lt;DIV class=""&gt;1)&amp;nbsp;&lt;DIV class=""&gt;&lt;P&gt;&lt;SPAN&gt;&lt;!--   ScriptorStartFragment   --&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;# Define schema for payload&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;payload_schema = StructType([&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; StructField("field1", StringType(), True),&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; StructField("field2", IntegerType(), True)&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;])&lt;!--   ScriptorEndFragment   --&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;DIV class=""&gt;&lt;SPAN&gt;&lt;!--   ScriptorStartFragment   --&gt;.withColumn("payload", from_json(col("payload"), payload_schema))&lt;!--   ScriptorEndFragment   --&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 01 Dec 2025 15:12:22 GMT</pubDate>
    <dc:creator>saurabh18cs</dc:creator>
    <dc:date>2025-12-01T15:12:22Z</dc:date>
    <item>
      <title>Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140644#M51497</link>
      <description>&lt;P&gt;Dear Team!&lt;/P&gt;&lt;P&gt;I want to prevent at a certain table that the schema is automatically updated. With plain strucutred streaming I can do the following:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;silver_df.writeStream \
    .format("delta") \
    .option("mergeSchema", "false") \
    .option("checkpointLocation", checkpoint_path) \
    .outputMode("append") \
    .table("silver_table")&lt;/LI-CODE&gt;&lt;P&gt;How can I set mergeSchema=false with Declarative Pipelines?&lt;/P&gt;</description>
      <pubDate>Sun, 30 Nov 2025 13:00:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140644#M51497</guid>
      <dc:creator>a_user12</dc:creator>
      <dc:date>2025-11-30T13:00:30Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140651#M51499</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Did you try something like in a link below? Of course in your case you want to set it to "false":&lt;/P&gt;&lt;P&gt;&lt;A href="https://medium.com/@infinitylearnings1201/h-learn-data-engineering-databricks-delta-live-table-56dd1d9b66ac" target="_blank"&gt;H Learn Data Engineering: Databricks Delta Live Table | by THE BRICK LEARNING | Medium&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="szymon_dybczak_0-1764517519244.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/22001iAC3C2E8B24FA2EEA/image-size/medium?v=v2&amp;amp;px=400" role="button" title="szymon_dybczak_0-1764517519244.png" alt="szymon_dybczak_0-1764517519244.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 30 Nov 2025 15:46:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140651#M51499</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2025-11-30T15:46:16Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140663#M51502</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110502"&gt;@szymon_dybczak&lt;/a&gt;&amp;nbsp;&amp;nbsp;- thank you for your response&amp;nbsp;&lt;/P&gt;&lt;P&gt;I try:&lt;/P&gt;&lt;DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;LI-CODE lang="python"&gt;@dlt.table(
    name="deserialized",
    comment="Raw messages from Kafka topic as JSON",
    table_properties={
        "pipelines.autoOptimize.managed": "true",
        "pipelines.autoCompact.managed": "true"
    }
)
def deserialize():
    # Read from Kafka
    return spark.readStream \
            .table("stringified") \
            .withColumn("payload", from_json(col("payload"),None,{"schemaLocationKey": "x"})) \
            .select("topic","timestamp","payload") \
            .withColumn("new-x",lit("foo"))
    



@dlt.table(
    name="enriched_table",
    table_properties={
        "pipelines.autoOptimize.managed": "true",
        "pipelines.autoCompact.managed": "true"
    }
)
def enriched_table():
    return spark.readStream.option("mergeSchema","false").table("deserialized")
        #.withColumn("new",lit("new"))  # ensure columns match exactly    &lt;/LI-CODE&gt;&lt;P&gt;&lt;SPAN&gt;I would expect, that if the attribute "nex-x" is not existing in the table "enriched table" yet I get an error. Indeed, it is simply adding the new column in the "neriched table".&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sun, 30 Nov 2025 19:54:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140663#M51502</guid>
      <dc:creator>a_user12</dc:creator>
      <dc:date>2025-11-30T19:54:05Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140753#M51521</link>
      <description>&lt;P&gt;Hi&amp;nbsp; &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;SPAN&gt;DLT is designed to automatically evolve the schema of tables as your pipeline logic changes. If your code returns a DataFrame with new columns, DLT will add those columns to the table automatically. There is no built-in option to prevent this or to enforce a fixed schema.&lt;BR /&gt;1) you can try enforcing&amp;nbsp;&lt;STRONG&gt;explicit schema definitions in your pipeline code??&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;also try ,&amp;nbsp;&lt;SPAN&gt;Disablng schema auto merge globally&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;!--   ScriptorStartFragment   --&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;2) spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", "false")&lt;!--   ScriptorEndFragment   --&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;DIV class=""&gt;example :&lt;DIV class=""&gt;1)&amp;nbsp;&lt;DIV class=""&gt;&lt;P&gt;&lt;SPAN&gt;&lt;!--   ScriptorStartFragment   --&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;# Define schema for payload&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;payload_schema = StructType([&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; StructField("field1", StringType(), True),&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; StructField("field2", IntegerType(), True)&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;])&lt;!--   ScriptorEndFragment   --&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;DIV class=""&gt;&lt;SPAN&gt;&lt;!--   ScriptorStartFragment   --&gt;.withColumn("payload", from_json(col("payload"), payload_schema))&lt;!--   ScriptorEndFragment   --&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 01 Dec 2025 15:12:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140753#M51521</guid>
      <dc:creator>saurabh18cs</dc:creator>
      <dc:date>2025-12-01T15:12:22Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140757#M51523</link>
      <description>&lt;P&gt;BETTER MODIFIED RESPONSE&lt;/P&gt;&lt;P&gt;Hi&amp;nbsp; &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;SPAN&gt;DLT is designed to automatically evolve the schema of tables as your pipeline logic changes. If your code returns a DataFrame with new columns, DLT will add those columns to the table automatically. There is no built-in option to prevent this or to enforce a fixed schema. however in your case you are adding option during readstream. mergeSchema on &lt;STRONG&gt;readStream&lt;/STRONG&gt; has &lt;STRONG&gt;no effect&lt;/STRONG&gt; on whether downstream &lt;STRONG&gt;tables evolve&lt;/STRONG&gt;. Schema evolution happens on &lt;STRONG&gt;write&lt;/STRONG&gt; (or when DLT materializes a table)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;BR /&gt;1) you can try enforcing&amp;nbsp;&lt;STRONG&gt;explicit schema definitions in your pipeline code?? OPTIONAL&lt;/STRONG&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;also try ,&amp;nbsp;&lt;SPAN&gt;Disablng schema auto merge globally (if adding to your write is not working)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;!--    ScriptorStartFragment    --&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;2) spark.conf.set("spark.databricks.delta.schema.autoMerge.enabled", "false")&lt;!--    ScriptorEndFragment    --&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;DIV class=""&gt;example :&lt;DIV class=""&gt;1)&amp;nbsp;&lt;DIV class=""&gt;&lt;P&gt;&lt;SPAN&gt;&lt;!--    ScriptorStartFragment    --&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;# Define schema for payload&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;payload_schema = StructType([&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; StructField("field1", StringType(), True),&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; StructField("field2", IntegerType(), True)&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;])&lt;!--    ScriptorEndFragment    --&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;DIV class=""&gt;&lt;SPAN&gt;&lt;!--    ScriptorStartFragment    --&gt;.withColumn("payload", from_json(col("payload"), payload_schema))&lt;!--    ScriptorEndFragment    --&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 01 Dec 2025 15:50:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140757#M51523</guid>
      <dc:creator>saurabh18cs</dc:creator>
      <dc:date>2025-12-01T15:50:29Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140818#M51539</link>
      <description>&lt;P&gt;I am aware of&amp;nbsp;&lt;SPAN&gt;from_json but I want to prevent schema changes on the delta table itself&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 02 Dec 2025 06:31:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140818#M51539</guid>
      <dc:creator>a_user12</dc:creator>
      <dc:date>2025-12-02T06:31:48Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140965#M51592</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110209"&gt;@a_user12&lt;/a&gt;&amp;nbsp;can you try with following :&lt;/P&gt;&lt;P&gt;ALTER TABLE &amp;lt;table_name&amp;gt; SET TBLPROPERTIES ('delta.minReaderVersion' = '2', 'delta.minWriterVersion' = '5', 'delta.columnMapping.mode' = 'name', 'delta.schema.autoMerge.enabled' = 'false');&lt;/P&gt;</description>
      <pubDate>Wed, 03 Dec 2025 08:09:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140965#M51592</guid>
      <dc:creator>saurabh18cs</dc:creator>
      <dc:date>2025-12-03T08:09:49Z</dc:date>
    </item>
    <item>
      <title>Re: Declarative Pipelines: set Merge Schema to False</title>
      <link>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140976#M51594</link>
      <description>&lt;P&gt;It is automatic in DLT. If there are significant schema changes, you need to full refresh. Maybe consider storing everything (the whole JSON) in a single VARIANT column and unpacking only what is necessary later - this way you will have it under control.&lt;/P&gt;</description>
      <pubDate>Wed, 03 Dec 2025 09:16:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/declarative-pipelines-set-merge-schema-to-false/m-p/140976#M51594</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2025-12-03T09:16:05Z</dc:date>
    </item>
  </channel>
</rss>

