<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Issues with Common Data Model as Source -  different column size for blobs in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/issues-with-common-data-model-as-source-different-column-size/m-p/72752#M34596</link>
    <description>&lt;P&gt;This is a thoughtful consideration, but have you considered using&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;.option("mergeSchema", "true")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;when writing?&lt;BR /&gt;Do keep in mind that this will affect the target table and possible downstream consumers. Ideally you want to have strict a schema contract with your data suppliers to avoid these issues. You can also consider dynamically creating the model.json based on the file headers you're receiving.&lt;/P&gt;</description>
    <pubDate>Wed, 12 Jun 2024 09:35:20 GMT</pubDate>
    <dc:creator>jacovangelder</dc:creator>
    <dc:date>2024-06-12T09:35:20Z</dc:date>
    <item>
      <title>Issues with Common Data Model as Source -  different column size for blobs</title>
      <link>https://community.databricks.com/t5/data-engineering/issues-with-common-data-model-as-source-different-column-size/m-p/72750#M34595</link>
      <description>&lt;P&gt;I have a Dataverse Synapse link set up to extract data into ADLS gen2. I am trying to connect ADLS gen2 as the data source to read the data files in Databricks. I have CDC enabled for CDM Data with the partition of Year and Month.&lt;BR /&gt;So, for example, if I am receiving 10 files for the table Product, the problem is the column size varies for the different csv received and there is only a single model.json for them. This is causing issues when reading the data with respect to column variation while reading schema.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jun 2024 08:17:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/issues-with-common-data-model-as-source-different-column-size/m-p/72750#M34595</guid>
      <dc:creator>medha</dc:creator>
      <dc:date>2024-06-12T08:17:07Z</dc:date>
    </item>
    <item>
      <title>Re: Issues with Common Data Model as Source -  different column size for blobs</title>
      <link>https://community.databricks.com/t5/data-engineering/issues-with-common-data-model-as-source-different-column-size/m-p/72752#M34596</link>
      <description>&lt;P&gt;This is a thoughtful consideration, but have you considered using&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;.option("mergeSchema", "true")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;when writing?&lt;BR /&gt;Do keep in mind that this will affect the target table and possible downstream consumers. Ideally you want to have strict a schema contract with your data suppliers to avoid these issues. You can also consider dynamically creating the model.json based on the file headers you're receiving.&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jun 2024 09:35:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/issues-with-common-data-model-as-source-different-column-size/m-p/72752#M34596</guid>
      <dc:creator>jacovangelder</dc:creator>
      <dc:date>2024-06-12T09:35:20Z</dc:date>
    </item>
  </channel>
</rss>

