<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Process single data set with different JSON schema rows using Pyspark in databricks in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/81726#M36399</link>
    <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hi,&lt;BR /&gt;i am getting data from event hub and stored in delta table as a row table, i data i received in json , the problem i data i have different schema in each row but i code i use it take first row a json schema i am stuck how to do please any one guide me&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.functions &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; explode, from_json, col,schema_of_json&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; ArrayType, StringType&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; functions &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; F&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt; &lt;SPAN&gt;*&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;sample_json &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; df.&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;first&lt;/SPAN&gt;&lt;SPAN&gt;()[&lt;/SPAN&gt;&lt;SPAN&gt;0&lt;/SPAN&gt;&lt;SPAN&gt;]&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;json_schema &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;schema_of_json&lt;/SPAN&gt;&lt;SPAN&gt;(sample_json)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(json_schema)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;value_df &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; (&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; df.&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;from_json&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;cast&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"string"&lt;/SPAN&gt;&lt;SPAN&gt;), json_schema).&lt;/SPAN&gt;&lt;SPAN&gt;alias&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"analytics"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; )&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;explode&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"analytics"&lt;/SPAN&gt;&lt;SPAN&gt;)).&lt;/SPAN&gt;&lt;SPAN&gt;alias&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value.customer_id"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;value_df.&lt;/SPAN&gt;&lt;SPAN&gt;display&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="ahmed_zarar_0-1722683168135.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/10120iAB5F7AAE6A265C5E/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="ahmed_zarar_0-1722683168135.png" alt="ahmed_zarar_0-1722683168135.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sat, 03 Aug 2024 11:30:19 GMT</pubDate>
    <dc:creator>ahmed_zarar</dc:creator>
    <dc:date>2024-08-03T11:30:19Z</dc:date>
    <item>
      <title>Process single data set with different JSON schema rows using Pyspark in databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/81726#M36399</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hi,&lt;BR /&gt;i am getting data from event hub and stored in delta table as a row table, i data i received in json , the problem i data i have different schema in each row but i code i use it take first row a json schema i am stuck how to do please any one guide me&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.functions &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; explode, from_json, col,schema_of_json&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; ArrayType, StringType&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; functions &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; F&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt; &lt;SPAN&gt;*&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;sample_json &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; df.&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;first&lt;/SPAN&gt;&lt;SPAN&gt;()[&lt;/SPAN&gt;&lt;SPAN&gt;0&lt;/SPAN&gt;&lt;SPAN&gt;]&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;json_schema &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;schema_of_json&lt;/SPAN&gt;&lt;SPAN&gt;(sample_json)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(json_schema)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;value_df &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; (&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; df.&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;from_json&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;cast&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"string"&lt;/SPAN&gt;&lt;SPAN&gt;), json_schema).&lt;/SPAN&gt;&lt;SPAN&gt;alias&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"analytics"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; )&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;explode&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;col&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"analytics"&lt;/SPAN&gt;&lt;SPAN&gt;)).&lt;/SPAN&gt;&lt;SPAN&gt;alias&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;select&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"Value.customer_id"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;value_df.&lt;/SPAN&gt;&lt;SPAN&gt;display&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="ahmed_zarar_0-1722683168135.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/10120iAB5F7AAE6A265C5E/image-size/medium/is-moderation-mode/true?v=v2&amp;amp;px=400" role="button" title="ahmed_zarar_0-1722683168135.png" alt="ahmed_zarar_0-1722683168135.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 03 Aug 2024 11:30:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/81726#M36399</guid>
      <dc:creator>ahmed_zarar</dc:creator>
      <dc:date>2024-08-03T11:30:19Z</dc:date>
    </item>
    <item>
      <title>Re: Process single data set with different JSON schema rows using Pyspark in databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/82014#M36478</link>
      <description>&lt;P&gt;Since each row has a different schema, if you plan to store these as separate columns, you cannot do it in 1 Delta table. You will need to split the messages into multiple tables depending on the some information within the message. In a real scenario, different schema messages should be sent to different partitions or completely different eventhubs so it is easy to segregate. However since you have all the messages in 1 eventhub, you will need to validate each message against a schema. You can of course use spark to do this but you need to tell spark what your expected schemas are and which tables you want to send the different type of messages.&amp;nbsp;&lt;/P&gt;&lt;P&gt;An example code is shown below.&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql.types &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; StructType, StructField&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;#Define the schemas of the different type of messages you can receive&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;screen_schema = ArrayType(StructType([&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'current_screen'&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'sequence'&lt;/SPAN&gt;&lt;SPAN&gt;, IntegerType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'screen_id'&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;]))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;cust_schema = ArrayType(StructType([&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'customer_id'&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'session_id'&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'app_ver'&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; StructField(&lt;/SPAN&gt;&lt;SPAN&gt;'screen_id'&lt;/SPAN&gt;&lt;SPAN&gt;, StringType(), &lt;/SPAN&gt;&lt;SPAN&gt;True&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;]))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;#First set&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;df1 = df\&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; .withColumn(&lt;/SPAN&gt;&lt;SPAN&gt;"json"&lt;/SPAN&gt;&lt;SPAN&gt;, explode(from_json(col(&lt;/SPAN&gt;&lt;SPAN&gt;"value"&lt;/SPAN&gt;&lt;SPAN&gt;), screen_schema)))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;screen_df = df1.select(df1.partition, df1.json.current_screen, df1.json.sequence, df1.json.screen_id)&lt;/SPAN&gt;&lt;SPAN&gt;.where(&lt;/SPAN&gt;&lt;SPAN&gt;df1.json.current_screen.isNotNull() &amp;amp; df1.json.sequence.isNotNull() &amp;amp; df1.json.screen_id.isNotNull())&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;screen_df.write.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"delta"&lt;/SPAN&gt;&lt;SPAN&gt;).mode(&lt;/SPAN&gt;&lt;SPAN&gt;"append"&lt;/SPAN&gt;&lt;SPAN&gt;).saveAsTable(&lt;/SPAN&gt;&lt;SPAN&gt;"Screen_table"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;df2 = df\&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; .withColumn(&lt;/SPAN&gt;&lt;SPAN&gt;"json"&lt;/SPAN&gt;&lt;SPAN&gt;, explode(from_json(col(&lt;/SPAN&gt;&lt;SPAN&gt;"value"&lt;/SPAN&gt;&lt;SPAN&gt;), cust_schema)))&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;cust_df = df2.select(df2.partition, df2.json.customer_id, df2.json.session_id, df2.json.app_ver, df2.json.screen_id)&lt;/SPAN&gt;&lt;SPAN&gt;.where(&lt;/SPAN&gt;&lt;SPAN&gt;df2.json.customer_id.isNotNull() &amp;amp; df2.json.session_id.isNotNull() &amp;amp; df2.json.session_id.isNotNull() &amp;amp; df2.json.screen_id.isNotNull())&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;cust_df.write.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"delta"&lt;/SPAN&gt;&lt;SPAN&gt;).mode(&lt;/SPAN&gt;&lt;SPAN&gt;"append"&lt;/SPAN&gt;&lt;SPAN&gt;).saveAsTable(&lt;/SPAN&gt;&lt;SPAN&gt;"Cust_table"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 06 Aug 2024 10:00:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/82014#M36478</guid>
      <dc:creator>Edthehead</dc:creator>
      <dc:date>2024-08-06T10:00:57Z</dc:date>
    </item>
    <item>
      <title>Re: Process single data set with different JSON schema rows using Pyspark in databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/82016#M36479</link>
      <description>&lt;P&gt;Thank you , I got it.&lt;/P&gt;</description>
      <pubDate>Tue, 06 Aug 2024 10:12:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/process-single-data-set-with-different-json-schema-rows-using/m-p/82016#M36479</guid>
      <dc:creator>ahmed_zarar</dc:creator>
      <dc:date>2024-08-06T10:12:09Z</dc:date>
    </item>
  </channel>
</rss>

