<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Flatten a complex JSON file and load into a delta table in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13829#M8430</link>
    <description>&lt;P&gt;How would I write this to a delta table to see if it works? &lt;/P&gt;</description>
    <pubDate>Wed, 13 Jul 2022 13:55:26 GMT</pubDate>
    <dc:creator>BeginnerBob</dc:creator>
    <dc:date>2022-07-13T13:55:26Z</dc:date>
    <item>
      <title>Flatten a complex JSON file and load into a delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13827#M8428</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am loading a JSON file into Databricks by simply doing the following:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;from pyspark.sql.functions import *&lt;/P&gt;&lt;P&gt;from pyspark.sql.types import *&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;bronze_path="wasbs://....../140477.json"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df_incremental = spark.read.option("multiline","true").json(bronze_path)&lt;/P&gt;&lt;P&gt;display(df_incremental)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My JSON file is complicated and is displayed: &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1726iC648ECC9719D2887/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I want to be able to load this data into a delta table.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My schema is:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;type AutoGenerated struct {
	Audit struct {
		Refno         string `json:"refno"`
		Formid        string `json:"formid"`
		AuditName     string `json:"audit_name"`
		AuditorName   string `json:"auditor_name"`
		Location      string `json:"location"`
		Fulllocation  string `json:"fulllocation"`
		Published     string `json:"published"`
		Date          string `json:"date"`
		Compliant     string `json:"compliant"`
		Archived      string `json:"archived"`
		Score         string `json:"score"`
		PossibleScore string `json:"possible_score"`
		Percentage    string `json:"percentage"`
		Answers       []struct {
			QuestionNumber   string        `json:"question_number"`
			Question         string        `json:"question"`
			Status           string        `json:"status"`
			Answerid         string        `json:"answerid"`
			Questionid       string        `json:"questionid"`
			Answer           string        `json:"answer"`
			Ansoptid         string        `json:"ansoptid,omitempty"`
			Observation      string        `json:"observation"`
			Compliant        string        `json:"compliant"`
			Score            string        `json:"score"`
			PossibleScore    string        `json:"possible_score"`
			DateResolved     string        `json:"date_resolved"`
			ResolvedByUser   string        `json:"resolved_by_user"`
			DateCompliant    string        `json:"date_compliant"`
			Compliancy       []interface{} `json:"compliancy"`
			HookName         string        `json:"hookName"`
			DateAuthorised   string        `json:"date_authorised,omitempty"`
			AuthorisedByUser string        `json:"authorised_by_user,omitempty"`
		} `json:"answers"`
	} `json:"audit"`
}&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Any idea how to do this?&lt;/P&gt;</description>
      <pubDate>Tue, 12 Jul 2022 17:27:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13827#M8428</guid>
      <dc:creator>BeginnerBob</dc:creator>
      <dc:date>2022-07-12T17:27:54Z</dc:date>
    </item>
    <item>
      <title>Re: Flatten a complex JSON file and load into a delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13828#M8429</link>
      <description>&lt;P&gt;delta can handle nested columns so you can just write it to delta lake.&lt;/P&gt;&lt;P&gt;Have you tried it already?  It will probably just work.&lt;/P&gt;&lt;P&gt;If you talke about new incoming data that you want to merge in an existing table, that will be a tad more complex.  You need to define a merge key  which will decide what operation you will do (insert, delete update).&lt;/P&gt;</description>
      <pubDate>Wed, 13 Jul 2022 13:45:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13828#M8429</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-07-13T13:45:21Z</dc:date>
    </item>
    <item>
      <title>Re: Flatten a complex JSON file and load into a delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13829#M8430</link>
      <description>&lt;P&gt;How would I write this to a delta table to see if it works? &lt;/P&gt;</description>
      <pubDate>Wed, 13 Jul 2022 13:55:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13829#M8430</guid>
      <dc:creator>BeginnerBob</dc:creator>
      <dc:date>2022-07-13T13:55:26Z</dc:date>
    </item>
    <item>
      <title>Re: Flatten a complex JSON file and load into a delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13830#M8431</link>
      <description>&lt;P&gt;ok so you already have a df with your json data (with inferred schema).&lt;/P&gt;&lt;P&gt;Next step is:&lt;/P&gt;&lt;P&gt;df.write \&lt;/P&gt;&lt;P&gt;  .format("delta") \&lt;/P&gt;&lt;P&gt;  .mode("overwrite") \&lt;/P&gt;&lt;P&gt;  .save("&amp;lt;whereveryouwanttostoreyourdata&amp;gt;")&lt;/P&gt;</description>
      <pubDate>Wed, 13 Jul 2022 13:59:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13830#M8431</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-07-13T13:59:05Z</dc:date>
    </item>
    <item>
      <title>Re: Flatten a complex JSON file and load into a delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13831#M8432</link>
      <description>&lt;P&gt;Hi @Lloyd Vickery​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Does @Werner Stinckens​&amp;nbsp; response answer your question? If yes, would you be happy to mark it as best so that other members can find the solution more quickly?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 04 Sep 2022 05:26:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/flatten-a-complex-json-file-and-load-into-a-delta-table/m-p/13831#M8432</guid>
      <dc:creator>Vidula</dc:creator>
      <dc:date>2022-09-04T05:26:33Z</dc:date>
    </item>
  </channel>
</rss>

