<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Auto Loader fails when reading json element containing space in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37364#M26334</link>
    <description>&lt;P&gt;I'm using Auto Loader as part of a Delta Live Tables pipeline to ingest json files, and today it failed with this error message:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;om.databricks.sql.transaction.tahoe.DeltaAnalysisException: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. 
org.apache.spark.sql.AnalysisException:  Column name "NotificationSettings.element.Microsoft Teams" contains invalid character(s). Please use alias to rename it.&lt;/LI-CODE&gt;&lt;P&gt;The failing json file contains an element named "Microsoft Teams" which causes the pipeline to fail. How can I handle such elements? The error message mentions the use of an alias, but I can't find any info on how to implement this.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;		"NotificationSettings": [
			{
				"NotificationType": "MissedActivityReminder",
				"Microsoft Teams": true
			},
...&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 11 Jul 2023 09:42:37 GMT</pubDate>
    <dc:creator>Magnus</dc:creator>
    <dc:date>2023-07-11T09:42:37Z</dc:date>
    <item>
      <title>Auto Loader fails when reading json element containing space</title>
      <link>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37364#M26334</link>
      <description>&lt;P&gt;I'm using Auto Loader as part of a Delta Live Tables pipeline to ingest json files, and today it failed with this error message:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;om.databricks.sql.transaction.tahoe.DeltaAnalysisException: Found invalid character(s) among ' ,;{}()\n\t=' in the column names of your schema. 
org.apache.spark.sql.AnalysisException:  Column name "NotificationSettings.element.Microsoft Teams" contains invalid character(s). Please use alias to rename it.&lt;/LI-CODE&gt;&lt;P&gt;The failing json file contains an element named "Microsoft Teams" which causes the pipeline to fail. How can I handle such elements? The error message mentions the use of an alias, but I can't find any info on how to implement this.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;		"NotificationSettings": [
			{
				"NotificationType": "MissedActivityReminder",
				"Microsoft Teams": true
			},
...&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 11 Jul 2023 09:42:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37364#M26334</guid>
      <dc:creator>Magnus</dc:creator>
      <dc:date>2023-07-11T09:42:37Z</dc:date>
    </item>
    <item>
      <title>Re: Auto Loader fails when reading json element containing space</title>
      <link>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37584#M26391</link>
      <description>&lt;P&gt;Please check if this helps:&lt;/P&gt;&lt;P&gt;&lt;A href="https://kb.databricks.com/en_US/delta/allow-spaces-and-special-characters-in-nested-column-names-with-delta-tables" target="_blank"&gt;https://kb.databricks.com/en_US/delta/allow-spaces-and-special-characters-in-nested-column-names-with-delta-tables&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 13 Jul 2023 16:52:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37584#M26391</guid>
      <dc:creator>Lakshay</dc:creator>
      <dc:date>2023-07-13T16:52:22Z</dc:date>
    </item>
    <item>
      <title>Re: Auto Loader fails when reading json element containing space</title>
      <link>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37611#M26398</link>
      <description>&lt;P&gt;I could not get the DeltaTable solution to work in combination with Auto Loader/DLT/Unity Catalog, since it expects a table location and I want the framework to handle that.&lt;/P&gt;&lt;P&gt;I also tried&amp;nbsp;&lt;SPAN&gt;withColumnRenamed, but I can't get it to work either. I still get the error message shown in my original question.&lt;/SPAN&gt;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;withColumnRenamed("NotificationSettings.Microsoft Teams", "MicrosoftTeams")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 14 Jul 2023 07:36:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37611#M26398</guid>
      <dc:creator>Magnus</dc:creator>
      <dc:date>2023-07-14T07:36:49Z</dc:date>
    </item>
    <item>
      <title>Re: Auto Loader fails when reading json element containing space</title>
      <link>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37622#M26402</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/72734"&gt;@Magnus&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;You can read the input file using Pandas or Koalas (&lt;A href="https://koalas.readthedocs.io/en/latest/index.html" target="_blank" rel="noopener"&gt;https://koalas.readthedocs.io/en/latest/index.html&lt;/A&gt;)&lt;/LI&gt;&lt;LI&gt;then rename the columns&lt;/LI&gt;&lt;LI&gt;then convert the Pandas/Koalas dataframe to Spark dataframe. You can write it back with the correct column name, so the next time you use it, the error will not happen.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 14 Jul 2023 11:02:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/auto-loader-fails-when-reading-json-element-containing-space/m-p/37622#M26402</guid>
      <dc:creator>Tharun-Kumar</dc:creator>
      <dc:date>2023-07-14T11:02:59Z</dc:date>
    </item>
  </channel>
</rss>

