<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>All Data Engineering posts</title>
    <link>https://community.databricks.com/t5/data-engineering/bd-p/data-engineering</link>
    <description>All Data Engineering posts</description>
    <pubDate>Sun, 28 Jun 2026 02:27:47 GMT</pubDate>
    <dc:creator>data-engineering</dc:creator>
    <dc:date>2026-06-28T02:27:47Z</dc:date>
    <item>
      <title>Re: DAB best practices suggestion</title>
      <link>https://community.databricks.com/t5/data-engineering/dab-best-practices-suggestion/m-p/160756#M54946</link>
      <description>&lt;P&gt;You can create Databricks Asset Bundles that are decoupled by domain, managed via multi target declarations within configuration and also driven by immutable, versioned artifacts stored securely within Unity Catalog Volumes. You can rely on explicit CI/CD gating and dynamic, scoped resource names rather than monolithic &amp;amp; hardcoded infrastructure definitions.&lt;/P&gt;&lt;H3&gt;&lt;U&gt;Bundle Structure &amp;amp; Domain Isolation&lt;/U&gt;&lt;/H3&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Decoupled Domain Bundles -&amp;nbsp;You can group configurations into small focused bundles aligned to specific data products or business domains instead of monolithic setup.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Shared Lifecycles -&amp;nbsp;Ensure that a single bundle contains only the resources (jobs, pipelines, dashboards) that share a unified deployment lifecycle and ownership domain boundary.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Target Definitions -&amp;nbsp;You can maintain all target definitions (dev, uat, prod) within a single yml per bundle to guarantee environmental structural parity.&amp;nbsp;More details &lt;A href="https://docs.databricks.com/aws/en/developers/best-practices/" target="_blank"&gt;here&lt;/A&gt;&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;H3&gt;&lt;U&gt;Multi-Target Environment Strategy&lt;/U&gt;&lt;/H3&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Development -&amp;nbsp;Configure for feature-branch agility. Implement dynamic resource renaming using built-in metadata expressions (such as ${workspace.current_user.short_name}) to enforce isolation within shared or personal workspaces. Route all computation to development catalogs and schemas.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Staging/User Acceptance Testing -&amp;nbsp;Trigger automated deployments on pull request merges to the main branch. This layer must run full integration suites and validation workflows against pre-production catalogs, mirroring production configurations identically.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Production -&amp;nbsp;Guard production workloads with manual approval workflows and strict role-based access control (RBAC) with the target production Unity Catalog environments.&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;H3&gt;&lt;U&gt;CI/CD Orchestration (Azure DevOps)&lt;/U&gt;&lt;/H3&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Pull Request Verification -&amp;nbsp;Enforce static analysis by running databricks bundle validate prior to any code merges to catch syntactical and structural anomalies early.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Continuous Deployment (UAT) -&amp;nbsp;Compile code, version artifacts, stage them directly into Unity Catalog volumes and execute target-specific deployments sequentially.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Release Management (Prod) -&amp;nbsp;Restrict production deployments to manual approval gates within Azure DevOps Environments. Re-use the identical, immutable artifacts verified in UAT to eliminate drift.&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;H3&gt;&lt;U&gt;Artifact &amp;amp; Dependency Management&lt;/U&gt;&lt;/H3&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Unity Catalog Volumes -&amp;nbsp;Store external dependencies (Python Wheels, JARs) inside secure, governed Unity Catalog Volumes rather than embedding large binaries directly into the bundle workspace.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Inter-Bundle Governance -&amp;nbsp;Model complex cross-bundle dependencies explicitly within Azure DevOps YAML pipeline tasks rather than nesting configuration files. Fail pipeline execution immediately if upstream assets are absent.&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Sat, 27 Jun 2026 17:07:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dab-best-practices-suggestion/m-p/160756#M54946</guid>
      <dc:creator>balajij8</dc:creator>
      <dc:date>2026-06-27T17:07:22Z</dc:date>
    </item>
    <item>
      <title>DAB best practices suggestion</title>
      <link>https://community.databricks.com/t5/data-engineering/dab-best-practices-suggestion/m-p/160754#M54945</link>
      <description>&lt;P class=""&gt;We're currently setting up &lt;STRONG&gt;Databricks Asset Bundles (DAB)&lt;/STRONG&gt; with a &lt;STRONG&gt;CI/CD pipeline using Azure DevOps&lt;/STRONG&gt;.&lt;/P&gt;&lt;P&gt;Our planned development workflow is as follows:&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Main branch → Developer creates a feature branch → Implement changes → Create a Pull Request → Senior developers review and approve → Merge into the main branch → Deploy to UAT → After UAT sign-off, deploy to Production.&lt;BR /&gt;&lt;BR /&gt;&lt;/STRONG&gt;I would like to hear suggestions specially the best practices as of now&lt;/P&gt;</description>
      <pubDate>Sat, 27 Jun 2026 16:13:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dab-best-practices-suggestion/m-p/160754#M54945</guid>
      <dc:creator>DazzaiDe</dc:creator>
      <dc:date>2026-06-27T16:13:18Z</dc:date>
    </item>
    <item>
      <title>Re: Bundle deployment overwrites artifacts while jobs are running - best practices?</title>
      <link>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160749#M54944</link>
      <description>&lt;P&gt;Yes I understand that part. if you have&amp;nbsp;&lt;SPAN&gt;source_linked_deployment set as false, both the developers will be deploying to the same location under /.bundle directory. Then the overwrite can happen.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;If&amp;nbsp;source_linked_deployment is set as True or not set(by default it is True), then the workflow will be pointing to the source. That is respective developer's directory.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 27 Jun 2026 14:54:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160749#M54944</guid>
      <dc:creator>sudhaktr</dc:creator>
      <dc:date>2026-06-27T14:54:51Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160748#M54943</link>
      <description>&lt;P&gt;Hello Balajij8,&lt;/P&gt;&lt;P&gt;I just wanted to let you know that the issue I posted regarding Spark not writing Parquet files was actually due to my own mistake.&lt;/P&gt;&lt;P&gt;I had mounted the data volume only in my Spark job (driver/scheduler) container instead of the Spark worker container. Since the worker executes the tasks and writes the output, the Parquet files were being stored inside the worker's filesystem. I was checking the job container, which only had the checkpoint and metadata directories because those were the only volumes I had mounted in my Docker Compose configuration.&lt;/P&gt;&lt;P&gt;Thank you for your time and for helping me investigate the issue. I really appreciate your guidance.&lt;/P&gt;</description>
      <pubDate>Sat, 27 Jun 2026 14:51:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160748#M54943</guid>
      <dc:creator>VikasM</dc:creator>
      <dc:date>2026-06-27T14:51:39Z</dc:date>
    </item>
    <item>
      <title>Re: Bundle deployment overwrites artifacts while jobs are running - best practices?</title>
      <link>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160747#M54942</link>
      <description>&lt;P&gt;no that not what I am talking about its like this in the picture we can generate a build artifact and use this in the job&lt;BR /&gt;so if a developer run the deploy and running there job and the same time 2nd deploy happens which overwrites the build. so now the first job is looking for whl which is overwritten&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="animeshjain_1-1782570494117.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/28339i7F95207D3AD116B0/image-size/medium?v=v2&amp;amp;px=400" role="button" title="animeshjain_1-1782570494117.png" alt="animeshjain_1-1782570494117.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="animeshjain_0-1782570427869.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/28338iFE2948A8E7F4306C/image-size/medium?v=v2&amp;amp;px=400" role="button" title="animeshjain_0-1782570427869.png" alt="animeshjain_0-1782570427869.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 27 Jun 2026 14:30:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160747#M54942</guid>
      <dc:creator>animeshjain</dc:creator>
      <dc:date>2026-06-27T14:30:48Z</dc:date>
    </item>
    <item>
      <title>Re: Bundle deployment overwrites artifacts while jobs are running - best practices?</title>
      <link>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160744#M54941</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Do you have source_linked_deployment set as false? That's probably causing it.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 27 Jun 2026 14:09:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160744#M54941</guid>
      <dc:creator>sudhaktr</dc:creator>
      <dc:date>2026-06-27T14:09:58Z</dc:date>
    </item>
    <item>
      <title>Bundle deployment overwrites artifacts while jobs are running - best practices?</title>
      <link>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160733#M54940</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;&lt;P&gt;I'm using #Declarative Automation Bundles (DAB) to deploy data pipelines, and I've run into an issue with concurrent job runs and deployment&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;What happened:&lt;/STRONG&gt;&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;I started a job that depends on a wheel file built by the bundle (timestamped artifact in&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class=""&gt;.bundle/.../artifacts/.internal/&lt;/SPAN&gt;)&lt;/LI&gt;&lt;LI&gt;While the job was running, I ran&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class=""&gt;databricks bundle deploy&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;again&lt;/LI&gt;&lt;LI&gt;The deployment generated a new timestamped wheel file and removed the old one&lt;/LI&gt;&lt;LI&gt;My running job failed with&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class=""&gt;ERROR_NO_SUCH_FILE_OR_DIRECTORY&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;because it couldn't find the original artifact&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;STRONG&gt;My concern:&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;This seems like it could be a problem in team environments. If two developers are working on the same bundle target:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Developer A starts a job from their deployment&lt;/LI&gt;&lt;LI&gt;Developer B deploys their changes to the same target&lt;/LI&gt;&lt;LI&gt;Developer A's running job fails due to missing artifacts&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;STRONG&gt;My questions:&lt;/STRONG&gt;&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Is this expected behavior, or am I misusing bundles?&lt;/LI&gt;&lt;LI&gt;What are the recommended patterns to prevent this in multi-developer teams?&lt;/LI&gt;&lt;LI&gt;Should each developer use personal bundle targets (&lt;SPAN class=""&gt;dev_alice&lt;/SPAN&gt;,&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class=""&gt;dev_bob&lt;/SPAN&gt;), or is there a better approach?&lt;/LI&gt;&lt;LI&gt;Does this same issue apply to production deployments? If so, how should we handle long-running jobs during deployment?&lt;/LI&gt;&lt;LI&gt;How should production CI/CD deployments be coordinated when scheduled or long-running jobs might be active?&amp;nbsp;Should we check for active runs before deploying? Is there a built-in mechanism or recommended pattern to prevent breaking currently executing production jobs?&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Any guidance on best practices for coordinating bundle deployments with active job runs would be appreciated!&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="animeshjain_0-1782560608354.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/28337i000A5C6981800F27/image-size/medium?v=v2&amp;amp;px=400" role="button" title="animeshjain_0-1782560608354.png" alt="animeshjain_0-1782560608354.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 27 Jun 2026 11:51:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/bundle-deployment-overwrites-artifacts-while-jobs-are-running/m-p/160733#M54940</guid>
      <dc:creator>animeshjain</dc:creator>
      <dc:date>2026-06-27T11:51:17Z</dc:date>
    </item>
    <item>
      <title>Lakeflow connect Native connectors (tik, meta ads, Google Ads) - one table per account</title>
      <link>https://community.databricks.com/t5/data-engineering/lakeflow-connect-native-connectors-tik-meta-ads-google-ads-one/m-p/160704#M54939</link>
      <description>&lt;P&gt;We want to leverage these connectors to pull in marketing spend data. But the docs seem to say that the destination must be unique based on accounts. For Tik, we have a hundred accounts... each account will have a destination table for each object.&amp;nbsp; So like this...&amp;nbsp;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&amp;nbsp;ads_account1, campaign_account1&lt;/LI&gt;&lt;LI&gt;&amp;nbsp;ads_account2, campaign_account2&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Total tables is accounts * number of objects. 100 accounts and assuming 6 tables means 600 tables!&amp;nbsp;&lt;/P&gt;&lt;P&gt;Is there a better solution? Would be great to ingest into only one table... all ads for an accounts feed into one Ads table.&lt;/P&gt;&lt;P&gt;&lt;EM&gt;Managed ingestion connectors don't support duplicate destination table names in the same schema&lt;/EM&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;also, forum says:&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV&gt;&lt;P class=""&gt;&amp;gt; The message subject contains t*i*k*t*o*k, which is not permitted in this community. Please remove this content before sending your post.&lt;/P&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;makes it hard to make a post about its connector!&lt;/DIV&gt;</description>
      <pubDate>Fri, 26 Jun 2026 20:55:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/lakeflow-connect-native-connectors-tik-meta-ads-google-ads-one/m-p/160704#M54939</guid>
      <dc:creator>GabeMatch</dc:creator>
      <dc:date>2026-06-26T20:55:21Z</dc:date>
    </item>
    <item>
      <title>Streaming Amazon DocumentDB to Databricks in near real time - what's the best approach?</title>
      <link>https://community.databricks.com/t5/data-engineering/streaming-amazon-documentdb-to-databricks-in-near-real-time-what/m-p/160692#M54938</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;&lt;P&gt;I'm looking for advice from anyone who has implemented near real-time ingestion from&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Amazon DocumentDB&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;into&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Databricks&lt;/STRONG&gt;.&lt;/P&gt;&lt;P&gt;Our current architecture is:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Application → Amazon DocumentDB&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Python AWS Lambda functions capture changes from DocumentDB&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Lambda continuously writes the data into Amazon Redshift&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Redshift is then used as our data warehouse&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;This setup has been working well for us.&lt;/P&gt;&lt;P&gt;We're now evaluating&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Databricks&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;as our analytics platform, but I'm not finding a straightforward way to stream data directly from DocumentDB into Databricks. I've heard that Databricks doesn't have a native connector or CDC support for Amazon DocumentDB.&lt;/P&gt;&lt;P&gt;My questions are:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;P&gt;Has anyone successfully implemented near real-time or real-time ingestion from Amazon DocumentDB into Databricks?&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;What architecture are you using?&lt;/P&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;I'm interested in production-proven architectures rather than proof-of-concept examples.&lt;/P&gt;&lt;P&gt;Thanks in advance!&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 15:44:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streaming-amazon-documentdb-to-databricks-in-near-real-time-what/m-p/160692#M54938</guid>
      <dc:creator>AustinBen</dc:creator>
      <dc:date>2026-06-26T15:44:56Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160689#M54937</link>
      <description>&lt;P&gt;Thanks for your reply.&lt;/P&gt;&lt;P&gt;I investigated the output directories a bit further before trying another path. If my understanding is correct, the volume mount and read/write permissions do not seem to be the issue in my case. The reason I think this is that both the Docker container and my local machine continuously create and update the checkpoints and data directories. The checkpoint files, offsets, commits, and _spark_metadata are all being written successfully,&lt;BR /&gt;which suggests that Spark can write to the mounted volume.&lt;/P&gt;&lt;P&gt;What confuses me is that _spark_metadata contains entries such as:&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;{"path":"file:///opt/spark/app/data/whale_alerts/part-00000-ac552411-0fa6-47c8-b120-4dfcc9227b09-c000.snappy.parquet","size":1125,"isDir":false,"modificationTime":1782477948968,"blockReplication":1,"blockSize":33554432,"action":"add"}&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;which indicates that Spark believes a Parquet file was committed. However, when I search both inside the container and on the host, the referenced part-*.snappy.parquet files do not exist—only the _spark_metadata directory is present. Could this indicate an issue during the file commit phase rather than a volume mount or permission problem? If so, are there any Spark or Hadoop configurations that you would recommend checking next?&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 15:18:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160689#M54937</guid>
      <dc:creator>VikasM</dc:creator>
      <dc:date>2026-06-26T15:18:03Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160675#M54936</link>
      <description>&lt;P&gt;Spark Structured Streaming writes to file sinks and generally it uses a phased commit by&amp;nbsp;writing temporary files&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;to the output directory followed by w&lt;/SPAN&gt;riting metadata&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;with references and a final&amp;nbsp;&lt;/SPAN&gt;commit&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;by moving/renaming temp files to final names. &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;You can verify the Docker side volume mount misconfigurations as some&lt;SPAN&gt;&amp;nbsp;docker configurations use temporary filesystems that get cleaned up or a background process removes the files. The files are written but immediately deleted.&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;SPAN&gt;You can also verify that&amp;nbsp;&lt;SPAN class=""&gt;/opt/spark/app/data&lt;/SPAN&gt;&amp;nbsp;is actually mounted to the host &amp;amp; ensure that the permissions&amp;nbsp; of&amp;nbsp;&lt;SPAN class=""&gt;_spark_metadata&lt;/SPAN&gt;&amp;nbsp;directories and the other directories remain the same - read/write for Spark to perform all operations seamlessly. &lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;SPAN&gt;You can change the code to write data to a path that has read/write access for Spark to perform all operations &amp;amp; validate &amp;amp; confirm.&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 13:37:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160675#M54936</guid>
      <dc:creator>balajij8</dc:creator>
      <dc:date>2026-06-26T13:37:55Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160671#M54935</link>
      <description>&lt;P&gt;Hello balajij8,&lt;/P&gt;&lt;P&gt;Before trying your suggestions, I decided to inspect the filesystem inside my Spark container once more.&lt;/P&gt;&lt;P&gt;I found something that has changed my understanding of the problem. There are no errors being reported by the streaming job, and the checkpoint and _spark_metadata directories are being updated continuously. I also found metadata entries that indicate Spark believes it has successfully written Parquet files.&lt;/P&gt;&lt;P&gt;However, I cannot find the actual part-*.snappy.parquet files in the output directory, even though the metadata references them. For example:&lt;/P&gt;&lt;P&gt;$ cd _spark_metadata&lt;BR /&gt;$ ls&lt;BR /&gt;0 1 2 3&lt;BR /&gt;$ cat 1&lt;BR /&gt;v1&lt;BR /&gt;{"path":"file:///opt/spark/app/data/whale_alerts/part-00000-ac552411-0fa6-47c8-b120-4dfcc9227b09-c000.snappy.parquet","size":1125,"isDir":false,"modificationTime":1782477948968,"blockReplication":1,"blockSize":33554432,"action":"add"}&lt;/P&gt;&lt;P&gt;But when I run:&lt;/P&gt;&lt;P&gt;find /opt/spark/app/data -name "*.parquet"&lt;/P&gt;&lt;P&gt;no Parquet files are found, either inside the container or on my host machine. Only the _spark_metadata files exist.&lt;/P&gt;&lt;P&gt;Since the streaming job is processing records successfully and the metadata is being written, I'm now wondering whether this is related to the file sink, filesystem, or Docker volume configuration rather than the upstream pipeline.&lt;/P&gt;&lt;P&gt;Before I start changing the Kafka configuration or thresholds, do you have any thoughts on why Spark would generate metadata entries without the corresponding Parquet files?&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 13:18:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160671#M54935</guid>
      <dc:creator>VikasM</dc:creator>
      <dc:date>2026-06-26T13:18:25Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160662#M54934</link>
      <description>&lt;P&gt;Thank you, balajij8, for your suggestions. I really appreciate your time and guidance.&lt;/P&gt;&lt;P&gt;I'll try the different configurations you recommended and investigate further. Once I've tested them, I'll come back and share the results.&lt;/P&gt;&lt;P&gt;Thanks again for your help!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;P.S.&amp;nbsp;"Did you see the messages I have already sent... I still don't see them above?"&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 12:33:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160662#M54934</guid>
      <dc:creator>VikasM</dc:creator>
      <dc:date>2026-06-26T12:33:26Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160660#M54933</link>
      <description>&lt;P&gt;&lt;SPAN&gt;The configuration is correct &amp;amp; mostly upstream is&amp;nbsp;the issue.&amp;nbsp;The Parquet sink can only write files when it receives data from the upstream.&amp;nbsp;&lt;/SPAN&gt;You can validate the 2 key configurations given below&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;&lt;SPAN class=""&gt;startingOffsets - latest&lt;/SPAN&gt;&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;- Code skips all historical Kafka data and it only processes messages that arrive after the stream starts. You can set it to&amp;nbsp;&lt;STRONG&gt;&lt;SPAN class=""&gt;earliest&amp;nbsp;&lt;/SPAN&gt;&lt;/STRONG&gt;&lt;SPAN class=""&gt;&amp;amp; validate&lt;/SPAN&gt;&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;WHALE_THRESHOLD_USD 50000&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;- Typical value can be 5 - 10. You can lower the threshold &amp;amp; validate temporarily and set it to 50000 later&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Even if Kafka has messages the pipeline filters out them because of the configurations.&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 12:12:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160660#M54933</guid>
      <dc:creator>balajij8</dc:creator>
      <dc:date>2026-06-26T12:12:10Z</dc:date>
    </item>
    <item>
      <title>Re: Implementing Row Level Security using ABAC</title>
      <link>https://community.databricks.com/t5/data-engineering/implementing-row-level-security-using-abac/m-p/160659#M54932</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/229992"&gt;@Rupa0503&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;Yes, you can do row-level security across one table or many in Unity Catalog without copying data per role. &lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/210897"&gt;@balajij8&lt;/a&gt;&amp;nbsp; pointed you in the right architectural direction (ABAC with governed tags, a reusable row-filter function, and centrally managed policies). Let me add the official requirements, a couple of corrections, and a simpler option if you only have a few tables.&lt;/P&gt;
&lt;P&gt;The first thing to decide is your path, and it comes down to scale.&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Path A&lt;/STRONG&gt; is the classic row filter. It's the simplest approach and it's the best fit when you only have a few tables. No tags or policies needed. You attach a UDF directly to each table.&lt;/P&gt;
&lt;PRE&gt;&lt;CODE class="language-sql"&gt;-- Function returns TRUE for rows the caller may see
CREATE OR REPLACE FUNCTION main.default.dept_filter(dept STRING)
RETURN is_account_group_member('data_admins')   -- admins see all
  OR is_account_group_member(dept);              -- else only your dept's rows

-- Attach to each table
ALTER TABLE main.default.employee_data
  SET ROW FILTER main.default.dept_filter ON (department);
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&lt;CODE&gt;is_account_group_member()&lt;/CODE&gt; is the role hook. It evaluates the querying user's group membership at runtime. You repeat the &lt;CODE&gt;ALTER TABLE&lt;/CODE&gt; for each table you want filtered.&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Path B&lt;/STRONG&gt; is ABAC policies. This is the better fit when you want one definition to govern many tables, including ones that don't exist yet. There are four steps.&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;Define governed tags at the account level (Catalog Explorer under tag policies, or the REST API/Terraform) for the attributes that drive access, things like &lt;CODE&gt;department&lt;/CODE&gt;, &lt;CODE&gt;region&lt;/CODE&gt;, and &lt;CODE&gt;sensitivity&lt;/CODE&gt;. One correction here: governed tags are not created with an inline &lt;CODE&gt;CREATE TAG ... VALUES(...)&lt;/CODE&gt; statement. They're managed at the account level, then assigned to objects.&lt;/LI&gt;
&lt;LI&gt;Assign tags to the relevant tables and columns.&lt;/LI&gt;
&lt;/OL&gt;
&lt;PRE&gt;&lt;CODE class="language-sql"&gt;ALTER TABLE main.default.employee_data SET TAGS ('department' = 'hr');
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;OL start="3"&gt;
&lt;LI&gt;Create the row-filter UDF. It returns a BOOLEAN, same as Path A.&lt;/LI&gt;
&lt;LI&gt;Create a policy on the catalog, schema, or table, bound to the tagged objects.&lt;/LI&gt;
&lt;/OL&gt;
&lt;PRE&gt;&lt;CODE class="language-sql"&gt;CREATE POLICY dept_rls
ON SCHEMA main.default
COMMENT 'Users see only their department''s rows'
ROW FILTER main.default.dept_filter
TO `account users` EXCEPT `data_admins`
FOR TABLES
MATCH COLUMNS has_tag('department') AS dept
USING COLUMNS (dept);
&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;The payoff with Path B is that you define it once on the schema or catalog, and any current or future table carrying the &lt;CODE&gt;department&lt;/CODE&gt; tag gets filtered automatically. No per-table wiring.&lt;/P&gt;
&lt;P&gt;A few guardrails to get right before you go to production:&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;ABAC adds restrictions on top of access, it doesn't grant it. You still need the normal object-level &lt;CODE&gt;GRANT&lt;/CODE&gt; on the table. ABAC only filters what's visible after access is granted.&lt;/LI&gt;
&lt;LI&gt;Requirements and privileges: supported compute (serverless or DBR 16.4+), &lt;CODE&gt;MANAGE&lt;/CODE&gt; on the securable, and &lt;CODE&gt;EXECUTE&lt;/CODE&gt; on the UDF. ABAC policies are GA as of mid-2026.&lt;/LI&gt;
&lt;LI&gt;Only one distinct row filter can resolve per user and table at runtime. If multiple different filters apply to the same user and table, Databricks errors out. So consolidate your role logic into one reusable function, or make sure your policies are mutually exclusive.&lt;/LI&gt;
&lt;LI&gt;Add an admin or service-account escape hatch in the UDF (the &lt;CODE&gt;data_admins&lt;/CODE&gt; check above), otherwise pipelines and owners can lock themselves out.&lt;/LI&gt;
&lt;LI&gt;For complex role to data mappings, have the UDF run an &lt;CODE&gt;EXISTS&lt;/CODE&gt; query against a lookup table (e.g. &lt;CODE&gt;role_access_map&lt;/CODE&gt;) instead of chaining &lt;CODE&gt;is_account_group_member()&lt;/CODE&gt; CASE branches. It's much easier to maintain as your roles grow.&lt;/LI&gt;
&lt;LI&gt;If you also need to hide columns or values (masking an SSN, for example), use the sibling column mask feature. Same ABAC machinery, just &lt;CODE&gt;COLUMN MASK&lt;/CODE&gt; instead of &lt;CODE&gt;ROW FILTER&lt;/CODE&gt;.&lt;/LI&gt;
&lt;/OL&gt;
&lt;P&gt;The short version: for RLS across many tables without duplicating data, use Unity Catalog ABAC with governed tags and a single reusable row-filter UDF. If you only have a few tables, a direct &lt;CODE&gt;SET ROW FILTER&lt;/CODE&gt; gets you there in two statements.&lt;/P&gt;
&lt;P&gt;Docs:&lt;/P&gt;
&lt;OL&gt;
&lt;LI&gt;ABAC in Unity Catalog: &lt;A href="https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/" target="_blank"&gt;https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;Create and manage row filter and column mask policies: &lt;A href="https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/policies" target="_blank"&gt;https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/policies&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;Row filters and column masks: &lt;A href="https://docs.databricks.com/aws/en/data-governance/unity-catalog/filters-and-masks" target="_blank"&gt;https://docs.databricks.com/aws/en/data-governance/unity-catalog/filters-and-masks&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;Requirements, quotas, and limitations: &lt;A href="https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/requirements" target="_blank"&gt;https://docs.databricks.com/aws/en/data-governance/unity-catalog/abac/requirements&lt;/A&gt;&lt;/LI&gt;
&lt;/OL&gt;
&lt;P&gt;Cheers, Louis.&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 12:10:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/implementing-row-level-security-using-abac/m-p/160659#M54932</guid>
      <dc:creator>Louis_Frolio</dc:creator>
      <dc:date>2026-06-26T12:10:51Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160657#M54930</link>
      <description>&lt;P&gt;I have send reply for this message 5 times already I don't know what is going on here&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 11:59:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160657#M54930</guid>
      <dc:creator>VikasM</dc:creator>
      <dc:date>2026-06-26T11:59:37Z</dc:date>
    </item>
    <item>
      <title>Re: how to access snapshots in iceberg tables?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-access-snapshots-in-iceberg-tables/m-p/160656#M54929</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/220160"&gt;@gaurang033&lt;/a&gt;&amp;nbsp;, I believe my solution gets you going in the right direction.&amp;nbsp; Please give it a read and let me know.&amp;nbsp; Cheers, Louis.&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 11:55:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-access-snapshots-in-iceberg-tables/m-p/160656#M54929</guid>
      <dc:creator>Louis_Frolio</dc:creator>
      <dc:date>2026-06-26T11:55:39Z</dc:date>
    </item>
    <item>
      <title>Re: Is there a way to deactivate genie auto corretion</title>
      <link>https://community.databricks.com/t5/data-engineering/is-there-a-way-to-deactivate-genie-auto-corretion/m-p/160655#M54928</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/241502"&gt;@Félix_banqi&lt;/a&gt;,&lt;/P&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea" data-pm-slice="1 1 []"&gt;Sorry you are facing this issue. That definitely doesn’t sound like the intended experience.&lt;/P&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea" data-pm-slice="1 1 []"&gt;I would like to understand the issue better to give you a better steer. Is there an example you can share?&lt;/P&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;In the meantime, given that you have already tried the developer settings, a couple of things may help...&lt;/P&gt;
&lt;UL&gt;
&lt;LI class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;If you are using Genie Code in Agent mode, it can behave much more autonomously. Databricks documents that Agent mode is designed for multi-step workflows, while Chat mode is better for narrower help, such as explanations and simpler code generation, so switching to Chat mode is often the safest option when you want assistance without many automatic changes. See &lt;A href="https://docs.databricks.com/aws/en/genie-code/use-genie-code" rel="noopener noreferrer nofollow" target="_blank"&gt;Use Genie Code&lt;/A&gt;.&lt;/LI&gt;
&lt;LI class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;It's also worth keeping approvals strict. The docs say Genie Code asks for approval before using tools like editing notebooks or running code, and "Ask every time" is the default behaviour. See &lt;A href="https://docs.databricks.com/aws/en/genie-code/use-genie-code#approve-tools" rel="noopener noreferrer nofollow" target="_blank"&gt;Use Genie Code&lt;/A&gt;.&lt;/LI&gt;
&lt;LI class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;For notebook/code fixes specifically, Genie Code supports diff-based flows like /fix, where you can accept or reject the proposed change, and accepted code does not automatically run. See &lt;A href="https://docs.databricks.com/aws/en/notebooks/code-assistant" rel="noopener noreferrer nofollow" target="_blank"&gt;Get coding help from Genie Code&lt;/A&gt;.&lt;/LI&gt;
&lt;LI class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;Databricks also calls out that results can vary and usually improve when prompts are more explicit... for example, specifying the exact output you want, the library to use, or the format of the answer. See &lt;A href="https://docs.databricks.com/aws/en/genie-code/tips" rel="noopener noreferrer nofollow" target="_blank"&gt;Tips to improve Genie Code responses&lt;/A&gt;.&lt;/LI&gt;
&lt;/UL&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;If you already got into a bad state, another useful point is that Genie Code edits are tracked in revision history, and Databricks says you can roll back changes across notebooks, queries, files, and pipelines. See &lt;A href="https://www.databricks.com/blog/introducing-genie-code" rel="noopener noreferrer nofollow" target="_blank"&gt;Introducing Genie Code&lt;/A&gt;.&lt;/P&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;There is in-product feedback built in... Databricks documents the Useful/Not useful controls directly under Genie Code answers in &lt;A href="https://docs.databricks.com/aws/en/genie-code/use-genie-code#give-feedback-on-genie-code-answers" rel="noopener noreferrer nofollow" target="_blank"&gt;Use Genie Code&lt;/A&gt;. If this is causing code corruption or unexpected edits, I would also recommend raising it through their normal Databricks support channel with as much detail as possible.&lt;/P&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;Once I have additional information (such as an example), I'm happy to raise this internally.&lt;/P&gt;
&lt;P class="wnfdntt _1ibi0s3f5 _1ibi0s3ce _1ibi0s3ea"&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 11:55:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/is-there-a-way-to-deactivate-genie-auto-corretion/m-p/160655#M54928</guid>
      <dc:creator>Ashwin_DSA</dc:creator>
      <dc:date>2026-06-26T11:55:13Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160653#M54927</link>
      <description>&lt;P&gt;Thank you, balajij8, for your suggestion about enabling case-sensitive mode. It worked! The process now moves past the previous error, and Spark is successfully consuming data from Kafka.&lt;/P&gt;&lt;P&gt;However, it looks like I've run into another issue. Although the streaming job is consuming the data, it doesn't appear to be writing any Parquet files as expected.&lt;/P&gt;&lt;P&gt;I do see the checkpoint directories being created correctly, both inside the Spark container and on my local machine through the mounted volume, so it seems the streaming queries are running. The only thing missing is the Parquet output.&lt;/P&gt;&lt;P&gt;I'll investigate this next, but if you have any suggestions about what might cause Spark Structured Streaming to create checkpoints without writing any output files, I'd really appreciate your guidance.&lt;/P&gt;&lt;P&gt;following is my Parquet sink:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;whale_query = (
    whale_df.writeStream
    .queryName("whale_alerts")
    .format("parquet")
    .outputMode("append")
    .option(
        "path",
        "/opt/spark/app/data/whale_alerts"
    )
    .option(
        "checkpointLocation",
        "/opt/spark/app/checkpoints/whale_alerts"
    )
    .trigger(processingTime="10 seconds")
    .start()
)

# ========================================================================
# KLINE PARQUET SINK
# ========================================================================
kline_query = (
    kline_df.writeStream
    .queryName("candlestick_history")
    .format("parquet")
    .outputMode("append")
    .option(
        "path",
        "/opt/spark/app/data/candlesticks"
    )
    .option(
        "checkpointLocation",
        "/opt/spark/app/checkpoints/candlesticks"
    )
    .trigger(processingTime="10 seconds")
    .start()
)

print("🚀 Whale detection pipeline running")
print("🚀 Candlestick pipeline running")

spark.streams.awaitAnyTermination()&lt;/LI-CODE&gt;&lt;P&gt;Thank you again for your help!&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 11:54:44 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160653#M54927</guid>
      <dc:creator>VikasM</dc:creator>
      <dc:date>2026-06-26T11:54:44Z</dc:date>
    </item>
    <item>
      <title>Re: PySpark AnalysisException: Ambiguous reference to field t when parsing nested JSON</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160642#M54922</link>
      <description>&lt;P&gt;Do check the other 2 options listed above too - upfront schema setup &amp;amp; field renaming&lt;/P&gt;</description>
      <pubDate>Fri, 26 Jun 2026 11:03:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-analysisexception-ambiguous-reference-to-field-t-when/m-p/160642#M54922</guid>
      <dc:creator>balajij8</dc:creator>
      <dc:date>2026-06-26T11:03:07Z</dc:date>
    </item>
  </channel>
</rss>

