<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to convert records in Azure Databricks delta table to a nested JSON structure? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8759#M4300</link>
    <description>&lt;P&gt;I had tried this, but it creates in the delta format, viz., the json output file contains one row for each record in the table. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="2023-02-24 22_08_34-MyTest - Databricks 2"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/595i6386183DB0BFB2D8/image-size/large?v=v2&amp;amp;px=999" role="button" title="2023-02-24 22_08_34-MyTest - Databricks 2" alt="2023-02-24 22_08_34-MyTest - Databricks 2" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;And the same thing happens if I use to_json as shown below. Since the examples in the databricks docs, I'm unable to construct a proper query:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="2023-02-24 22_08_34-MyTest - Databricks 3"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/602i1865E23C10282BBE/image-size/large?v=v2&amp;amp;px=999" role="button" title="2023-02-24 22_08_34-MyTest - Databricks 3" alt="2023-02-24 22_08_34-MyTest - Databricks 3" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Lastly, the intension of required json output as a file, is for the file based integration with other systems.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope that clarifies!&lt;/P&gt;</description>
    <pubDate>Fri, 24 Feb 2023 18:35:36 GMT</pubDate>
    <dc:creator>sujai_sparks</dc:creator>
    <dc:date>2023-02-24T18:35:36Z</dc:date>
    <item>
      <title>How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8756#M4297</link>
      <description>&lt;P&gt;Let's say I have a delta table in Azure databricks that stores the staff details (denormalized).&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="2023-02-24 22_08_34-MyTest - Databricks"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/609i4421517F11EF5CE0/image-size/large?v=v2&amp;amp;px=999" role="button" title="2023-02-24 22_08_34-MyTest - Databricks" alt="2023-02-24 22_08_34-MyTest - Databricks" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I wanted to export the data in the JSON format and save it as a single file on a storage location. I need help with the databricks sql query to group/construct the data in the JSON format.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is the sample code and desired output:&lt;/P&gt;&lt;P&gt;Delta Table schema:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
&amp;nbsp;
create table if not exists staff_details (
&amp;nbsp;
department_id int comment 'id of the department',
&amp;nbsp;
department_name string comment 'name of the department',
&amp;nbsp;
employee_id int comment 'employee id of the staff',
&amp;nbsp;
first_name string comment 'first name of the staff',
&amp;nbsp;
last_name string comment 'last name of the staff'
&amp;nbsp;
)
&amp;nbsp;
using delta&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Script to populate the delta table:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
&amp;nbsp;
insert into staff_details(department_id, department_name, employee_id, first_name, last_name)
values(1,'Dept-A',101,'Guru','Datt'), (1,'Dept-A',102,'Tom','Cruise'), (2,'Dept-B',201,'Angelina','Jolie')&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Show records:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
select * from staff_details order by department_id, employee_id&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Desired output:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;{
    "staff_details":[
        {
            "department_id":1,
            "department_name": "Dept-A",
            "staff_members": [
                {
                    "employee_id":101,
                    "first_name":"Guru",
                    "last_name":"Datt"
                },
                {
                    "employee_id":102,
                    "first_name":"Tom",
                    "last_name":"Cruise"
                }                
                
            ]
        },
        {
            "department_id":2,
            "department_name": "Dept-B",
            "staff_members": [
                {
                    "employee_id":201,
                    "first_name":"Angelina",
                    "last_name":"Jolie"
                }             
                
            ]
        }
    ]
}&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I tried using the to_json() function, and also using manual string concatenation with group by etc, but none of that is working well.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please help.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 16:42:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8756#M4297</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-24T16:42:54Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8757#M4298</link>
      <description>&lt;P&gt;Why do you want to convert the records to JSON?  Does it have to be SQL? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You could read the table with the DataFrame APIs and write a JSON file out. Please note that "coalesce" is only there so that it produces a single file. It would be something like this: &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.table("YOUR TABLE") 
&amp;nbsp;
df.coalesce(1).write.format('json').save("/path/to/file")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Also, can you show your example with the to_json() function?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:03:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8757#M4298</guid>
      <dc:creator>Ryan_Chynoweth</dc:creator>
      <dc:date>2023-02-24T18:03:52Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8758#M4299</link>
      <description>&lt;P&gt;repartition(1) is slightly better than coalesce for getting down to one file.  &lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:32:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8758#M4299</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-02-24T18:32:20Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8759#M4300</link>
      <description>&lt;P&gt;I had tried this, but it creates in the delta format, viz., the json output file contains one row for each record in the table. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="2023-02-24 22_08_34-MyTest - Databricks 2"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/595i6386183DB0BFB2D8/image-size/large?v=v2&amp;amp;px=999" role="button" title="2023-02-24 22_08_34-MyTest - Databricks 2" alt="2023-02-24 22_08_34-MyTest - Databricks 2" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;And the same thing happens if I use to_json as shown below. Since the examples in the databricks docs, I'm unable to construct a proper query:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="2023-02-24 22_08_34-MyTest - Databricks 3"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/602i1865E23C10282BBE/image-size/large?v=v2&amp;amp;px=999" role="button" title="2023-02-24 22_08_34-MyTest - Databricks 3" alt="2023-02-24 22_08_34-MyTest - Databricks 3" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Lastly, the intension of required json output as a file, is for the file based integration with other systems.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope that clarifies!&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:35:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8759#M4300</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-24T18:35:36Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8760#M4301</link>
      <description>&lt;P&gt;That is actually not a delta format. Spark writes data this way. The file you have highlighted is a JSON file. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Why do you need to have a json file?&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:38:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8760#M4301</guid>
      <dc:creator>Ryan_Chynoweth</dc:creator>
      <dc:date>2023-02-24T18:38:24Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8761#M4302</link>
      <description>&lt;P&gt;Good call. Thanks @Joseph Kambourakis​&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:38:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8761#M4302</guid>
      <dc:creator>Ryan_Chynoweth</dc:creator>
      <dc:date>2023-02-24T18:38:39Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8762#M4303</link>
      <description>&lt;P&gt;On Microsoft SQL Server, the following TSQL query would produce the desired output. But, I'm unable to replicate the same in Databricks SQL &lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;SELECT DISTINCT department_id, department_name 
	,(
		SELECT employee_id
			,first_name
			,last_name
		FROM staff_details sdi
		WHERE sdi.department_id = sdo.department_id
		FOR JSON PATH
		) AS staff_members
FROM staff_details sdo
ORDER BY sdo.department_id
FOR JSON PATH, ROOT ('staff_details');&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:55:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8762#M4303</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-24T18:55:48Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8763#M4304</link>
      <description>&lt;P&gt;Delta is the default format, but if you put using JSON it will write a json file.  The success and committed files are just the way that spark ensures you don't get partial writes.  &lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 18:58:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8763#M4304</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-02-24T18:58:50Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8764#M4305</link>
      <description>&lt;P&gt;I see. Here is a good example from another community &lt;A href="https://community.databricks.com/s/question/0D53f00001tCbWfCAK/how-to-merge-all-the-columns-into-one-column-as-json" alt="https://community.databricks.com/s/question/0D53f00001tCbWfCAK/how-to-merge-all-the-columns-into-one-column-as-json" target="_blank"&gt;post&lt;/A&gt;. &lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 19:48:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8764#M4305</guid>
      <dc:creator>Ryan_Chynoweth</dc:creator>
      <dc:date>2023-02-24T19:48:28Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8765#M4306</link>
      <description>&lt;P&gt;If you want to do this in SQL, here you go:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
select department_id, department_name, collect_list(struct(employee_id, first_name, last_name)) as staff_members
from staff_details
group by department_id, department_name&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;Below is how to do the same in Pyspark:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql.functions import *
&amp;nbsp;
df = spark.read.table("staff_details")
&amp;nbsp;
df1 = df.groupby("department_id", "department_name").agg(
    collect_list(struct(col("employee_id"), col("first_name"), col("last_name"))).alias("staff_members")
)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Both Give you identical dataframe structure to write out to JSON however you would like to:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/612i47BEE6B85A4BBF2A/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;﻿&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/606i2D24C20996D0C9FD/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 24 Feb 2023 22:02:38 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8765#M4306</guid>
      <dc:creator>NateAnth</dc:creator>
      <dc:date>2023-02-24T22:02:38Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8766#M4307</link>
      <description>&lt;P&gt;@Nathan Anthony​&amp;nbsp;, Thank you, Thank you so much! &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you @Ryan Chynoweth​&amp;nbsp;and @Joseph Kambourakis​&amp;nbsp;as well! All of you guys are great helping the community. There is a lot to learn from the community!&lt;/P&gt;</description>
      <pubDate>Sat, 25 Feb 2023 01:24:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8766#M4307</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-25T01:24:48Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8767#M4308</link>
      <description>&lt;P&gt;@Ryan Chynoweth​&amp;nbsp;, regarding your question "Why do you need to have a json file?", the intension of required JSON output as a file, is for the file based integration with other systems downstream.&lt;/P&gt;</description>
      <pubDate>Sat, 25 Feb 2023 01:27:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8767#M4308</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-25T01:27:11Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8768#M4309</link>
      <description>&lt;P&gt;How do I mark this question as answered?&lt;/P&gt;</description>
      <pubDate>Sat, 25 Feb 2023 01:31:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8768#M4309</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-25T01:31:23Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8769#M4310</link>
      <description>&lt;P&gt;Final sql:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
select
  collect_list(
    struct(department_id, department_name, staff_members)
  )
from
  (
    select
      department_id,
      department_name,
      collect_list(struct(employee_id, first_name, last_name)) as staff_members
    from
      staff_details
    group by
      department_id,
      department_name
  ) vt&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;And the output:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="2023-02-24 22_08_34-MyTest - Databricks 4"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/611i2B5CB52C22BC7430/image-size/large?v=v2&amp;amp;px=999" role="button" title="2023-02-24 22_08_34-MyTest - Databricks 4" alt="2023-02-24 22_08_34-MyTest - Databricks 4" /&gt;&lt;/span&gt;Tons of thanks to everyone!&lt;/P&gt;</description>
      <pubDate>Sat, 25 Feb 2023 01:36:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8769#M4310</guid>
      <dc:creator>sujai_sparks</dc:creator>
      <dc:date>2023-02-25T01:36:32Z</dc:date>
    </item>
    <item>
      <title>Re: How to convert records in Azure Databricks delta table to a nested JSON structure?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8770#M4311</link>
      <description>&lt;P&gt;Glad it worked for you!!&lt;/P&gt;</description>
      <pubDate>Sat, 25 Feb 2023 02:14:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-convert-records-in-azure-databricks-delta-table-to-a/m-p/8770#M4311</guid>
      <dc:creator>NateAnth</dc:creator>
      <dc:date>2023-02-25T02:14:40Z</dc:date>
    </item>
  </channel>
</rss>

