<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Schema Parsing issue when datatype of source field is mapped incorrect in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31493#M22931</link>
    <description>&lt;P&gt;Thanks Hubert! I did have Autoloader as one of the solution and I think this is a viable option to make sure I do not have schema parsing issues.&lt;/P&gt;</description>
    <pubDate>Wed, 19 Jan 2022 01:19:47 GMT</pubDate>
    <dc:creator>MattM</dc:creator>
    <dc:date>2022-01-19T01:19:47Z</dc:date>
    <item>
      <title>Schema Parsing issue when datatype of source field is mapped incorrect</title>
      <link>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31490#M22928</link>
      <description>&lt;P&gt;I have complex json file which has massive struct column. We regularly have issues when we try to parse this json file by forming our case class to extract the fields from schema. With this approach the issue we are facing is that if one data type of field within the case class is incorrect, the rest of the following fields in that class do not populate in the target. Hope the problem makes sense.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Is there any alternate way? One I can think of is to extract all the fields as string from json file and then do the data type conversion. This adds an extra step.  A better solution is appreciated. Thanks.&lt;/P&gt;</description>
      <pubDate>Mon, 17 Jan 2022 23:04:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31490#M22928</guid>
      <dc:creator>MattM</dc:creator>
      <dc:date>2022-01-17T23:04:29Z</dc:date>
    </item>
    <item>
      <title>Re: Schema Parsing issue when datatype of source field is mapped incorrect</title>
      <link>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31492#M22930</link>
      <description>&lt;P&gt;I think solution for your problem is use auto loader stream to read data as it support schema hints. If you don't want to use it as stream is enough to specify there trigger once (so once all json are loaded it will finish a job).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is about loading json:&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/spark/latest/structured-streaming/auto-loader-json.html" target="test_blank"&gt;https://docs.databricks.com/spark/latest/structured-streaming/auto-loader-json.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;then you can specify schema hints:&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/spark/latest/structured-streaming/auto-loader-schema.html#schema-hints" target="test_blank"&gt;https://docs.databricks.com/spark/latest/structured-streaming/auto-loader-schema.html#schema-hints&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;additionally you can experiment with different schema evolution options for stream&lt;/P&gt;</description>
      <pubDate>Tue, 18 Jan 2022 21:11:46 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31492#M22930</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-01-18T21:11:46Z</dc:date>
    </item>
    <item>
      <title>Re: Schema Parsing issue when datatype of source field is mapped incorrect</title>
      <link>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31493#M22931</link>
      <description>&lt;P&gt;Thanks Hubert! I did have Autoloader as one of the solution and I think this is a viable option to make sure I do not have schema parsing issues.&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jan 2022 01:19:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31493#M22931</guid>
      <dc:creator>MattM</dc:creator>
      <dc:date>2022-01-19T01:19:47Z</dc:date>
    </item>
    <item>
      <title>Re: Schema Parsing issue when datatype of source field is mapped incorrect</title>
      <link>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31494#M22932</link>
      <description>&lt;P&gt;Hey there, @Matt M​&amp;nbsp;- If @Hubert Dudek​'s response solved the issue, would you be happy to mark his answer as best? It helps other members find the solution more quickly.&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jan 2022 16:43:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31494#M22932</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-01-19T16:43:13Z</dc:date>
    </item>
    <item>
      <title>Re: Schema Parsing issue when datatype of source field is mapped incorrect</title>
      <link>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31495#M22933</link>
      <description>&lt;P&gt;Yes, thanks.&lt;/P&gt;</description>
      <pubDate>Thu, 20 Jan 2022 15:23:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/schema-parsing-issue-when-datatype-of-source-field-is-mapped/m-p/31495#M22933</guid>
      <dc:creator>MattM</dc:creator>
      <dc:date>2022-01-20T15:23:11Z</dc:date>
    </item>
  </channel>
</rss>

