<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: sample in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/sample/m-p/4439#M1168</link>
    <description>&lt;P&gt;@Suteja Kanuri​&amp;nbsp; can you please respond to my question above?&lt;/P&gt;</description>
    <pubDate>Mon, 22 May 2023 12:45:16 GMT</pubDate>
    <dc:creator>NathanSundarara</dc:creator>
    <dc:date>2023-05-22T12:45:16Z</dc:date>
    <item>
      <title>sample</title>
      <link>https://community.databricks.com/t5/data-engineering/sample/m-p/4436#M1165</link>
      <description>&lt;P&gt;Help parsing the JSON using Spark SQL or python. Sample json attached.&lt;/P&gt;</description>
      <pubDate>Sat, 13 May 2023 01:36:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sample/m-p/4436#M1165</guid>
      <dc:creator>NathanSundarara</dc:creator>
      <dc:date>2023-05-13T01:36:41Z</dc:date>
    </item>
    <item>
      <title>Re: sample</title>
      <link>https://community.databricks.com/t5/data-engineering/sample/m-p/4437#M1166</link>
      <description>&lt;P&gt;@Nathan Sundararajan​&amp;nbsp;:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Sure! Here is an example of how to parse the JSON data using Python:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;import json
&amp;nbsp;
# Load the JSON data from a file or string
data = {...}  # Your JSON data here
&amp;nbsp;
# Parse the JSON data into a Python object
obj = json.loads(data)
&amp;nbsp;
# Access the data using Python syntax
message_timestamp = obj["messageTimestamp"]
site_reference = obj["siteReference"]
update_count = obj["updateCount"]
updates = obj["updates"]
&amp;nbsp;
# Iterate through the updates array and access nested data
for update in updates:
    event_timestamp = update["eventTimestamp"]
    spot_reference = update["spotReference"]
    spot_info = update["spotInfo"]
    vehicle_spot = spot_info["vehicleSpot"]
    vehicle_data = spot_info["vehicleData"]
    # Access more nested data as needed...&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Alternatively, you can also use Spark SQL to parse the JSON data. Here is an example using the &lt;/P&gt;&lt;P&gt;spark.read.json() method:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql import SparkSession
&amp;nbsp;
# Create a SparkSession object
spark = SparkSession.builder.appName("JSON Parsing").getOrCreate()
&amp;nbsp;
# Load the JSON data from a file or string
data = {...}  # Your JSON data here
&amp;nbsp;
# Create a DataFrame from the JSON data
df = spark.read.json(sc.parallelize([data]))
&amp;nbsp;
# Register the DataFrame as a temporary view
df.createOrReplaceTempView("json_data")
&amp;nbsp;
# Query the data using Spark SQL syntax
result = spark.sql("""
    SELECT messageTimestamp, siteReference, updateCount, updates.eventTimestamp, updates.spotReference,
           updates.spotInfo.vehicleSpot.externalReference, updates.spotInfo.vehicleSpot.state,
           updates.spotInfo.vehicleData.externalReference, updates.spotInfo.vehicleData.state
    FROM json_data
    LATERAL VIEW EXPLODE(updates) AS updates
""")
&amp;nbsp;
# Show the result
result.show()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;This query selects some of the fields from the JSON data and explodes the updates array into separate rows. You can modify the query as needed to access more or less data.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 13 May 2023 17:11:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sample/m-p/4437#M1166</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-05-13T17:11:58Z</dc:date>
    </item>
    <item>
      <title>Re: sample</title>
      <link>https://community.databricks.com/t5/data-engineering/sample/m-p/4438#M1167</link>
      <description>&lt;P&gt;Can you explain how to go all the way to one of the array.. c3fieldstrings inside the field structure please?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;"fields": {&lt;/P&gt;&lt;P&gt;						"c3FieldStrings": [&lt;/P&gt;&lt;P&gt;							{&lt;/P&gt;&lt;P&gt;								"externalReference": "4095",&lt;/P&gt;&lt;P&gt;								"name": "Blocking Spot"&lt;/P&gt;&lt;P&gt;							},&lt;/P&gt;&lt;P&gt;							{&lt;/P&gt;&lt;P&gt;								"externalReference": "Disabled",&lt;/P&gt;&lt;P&gt;								"name": "Disabled",&lt;/P&gt;&lt;P&gt;								"value": "false"&lt;/P&gt;&lt;P&gt;							},&lt;/P&gt;&lt;P&gt;							{&lt;/P&gt;&lt;P&gt;								"externalReference": "WorkflowVehicleSpot",&lt;/P&gt;&lt;P&gt;								"name": "Spot",&lt;/P&gt;&lt;P&gt;								"value": "AA18_FL5"&lt;/P&gt;&lt;P&gt;							},&lt;/P&gt;&lt;P&gt;							{&lt;/P&gt;&lt;P&gt;								"externalReference": "WorkflowVMT",&lt;/P&gt;&lt;P&gt;								"name": "WorkflowVMT"&lt;/P&gt;&lt;P&gt;							},&lt;/P&gt;</description>
      <pubDate>Sat, 13 May 2023 20:51:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sample/m-p/4438#M1167</guid>
      <dc:creator>NathanSundarara</dc:creator>
      <dc:date>2023-05-13T20:51:54Z</dc:date>
    </item>
    <item>
      <title>Re: sample</title>
      <link>https://community.databricks.com/t5/data-engineering/sample/m-p/4439#M1168</link>
      <description>&lt;P&gt;@Suteja Kanuri​&amp;nbsp; can you please respond to my question above?&lt;/P&gt;</description>
      <pubDate>Mon, 22 May 2023 12:45:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sample/m-p/4439#M1168</guid>
      <dc:creator>NathanSundarara</dc:creator>
      <dc:date>2023-05-22T12:45:16Z</dc:date>
    </item>
    <item>
      <title>Re: sample</title>
      <link>https://community.databricks.com/t5/data-engineering/sample/m-p/4440#M1169</link>
      <description>&lt;P&gt;@Nathan Sundararajan​&amp;nbsp;: Does the below help?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;data = {
    "fields": {
        "c3FieldStrings": [
            {
                "externalReference": "4095",
                "name": "Blocking Spot"
            },
            {
                "externalReference": "Disabled",
                "name": "Disabled",
                "value": "false"
            },
            {
                "externalReference": "WorkflowVehicleSpot",
                "name": "Spot",
                "value": "AA18_FL5"
            },
            {
                "externalReference": "WorkflowVMT",
                "name": "WorkflowVMT"
            }
        ]
    }
}
&amp;nbsp;
c3FieldStrings = data["fields"]["c3FieldStrings"]&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 09 Jun 2023 09:40:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/sample/m-p/4440#M1169</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-06-09T09:40:22Z</dc:date>
    </item>
  </channel>
</rss>

