<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Unable to read files using Auto Loader in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153639#M53981</link>
    <description>&lt;P&gt;Thanks&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/89873"&gt;@BalaS&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/24053"&gt;@lingareddy_Alva&lt;/a&gt;&amp;nbsp;for your quick responses.&lt;/P&gt;&lt;P&gt;I've updated the schema location to:&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"schemaLocation"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"/Volumes/workspace/capstone/schema"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;and checkpoint location to:&amp;nbsp;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;/Volumes/workspace/capstone/checkpoint/1/&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;however, I'm still getting the same error. I'm using Databricks free version to develop a test pipeline.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;</description>
    <pubDate>Tue, 07 Apr 2026 16:08:58 GMT</pubDate>
    <dc:creator>AanchalSoni</dc:creator>
    <dc:date>2026-04-07T16:08:58Z</dc:date>
    <item>
      <title>Unable to read files using Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153616#M53977</link>
      <description>&lt;P&gt;Hi!&lt;/P&gt;&lt;P&gt;I'm trying to create an ETL pipeline. It reads data from a UC volume, however, Databricks is not allowing me to do so. The following error is generated:&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;AnalysisException: [RequestId=a11e017b-61db-4c30-a03a-d7cce55e5aea ErrorClass=INVALID_PARAMETER_VALUE.LOCATION_OVERLAP] Input path url 's3://dbstorage-prod-6ubki/uc/670643ac-88ac-4f51-8bb0-2311c001fab6/6b491f6f-d67e-44fe-9e04-bad30ec7a8cc/__unitystorage/catalogs/5f4192b5-79f2-415f-bfe8-729b201e40b9/tables/ea03463f-90af-4941-b2a6-47782054b3c9/_dlt_metadata/_autoloader' overlaps with managed storage within 'CheckPathAccess' call. .&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;Is it not possible to read directly from a volume using Auto Loader? Should the raw files be read from an external location only? Please guide.&lt;/P&gt;</description>
      <pubDate>Tue, 07 Apr 2026 12:14:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153616#M53977</guid>
      <dc:creator>AanchalSoni</dc:creator>
      <dc:date>2026-04-07T12:14:08Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read files using Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153627#M53978</link>
      <description>&lt;P&gt;You can absolutely use Auto Loader with files from volume. The issue is a path conflict in your case. Managed areas of a table&amp;nbsp;or volume are not to be touched to ensure data integrity and security governed by UC.&lt;/P&gt;&lt;H3&gt;&lt;FONT size="3"&gt;&lt;SPAN&gt;You can use the Unity Catalog Volume path in the Auto Loader.&amp;nbsp;&lt;SPAN&gt;Here is the Auto Loader implementation using the recommended Volume path. This ensures the conflicts are avoided.&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/H3&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;# Defined Schema (Ensure this matches your JSON structure)&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;PRE&gt;schema = &lt;SPAN class=""&gt;"id INT"

df = (spark.readStream
    .&lt;SPAN class=""&gt;format(&lt;SPAN class=""&gt;"cloudFiles")
    .option(&lt;SPAN class=""&gt;"cloudFiles.format", &lt;SPAN class=""&gt;"json")
    .option(&lt;SPAN class=""&gt;"cloudFiles.schemaLocation", &lt;SPAN class=""&gt;"/Volumes/workspace/default/sys/schema")
    .load(&lt;SPAN class=""&gt;"/Volumes/workspace/dev/input/") &lt;SPAN class=""&gt;# UC Volume Path
    .writeStream
    .&lt;SPAN class=""&gt;format(&lt;SPAN class=""&gt;"delta")
    .option(&lt;SPAN class=""&gt;"checkpointLocation", &lt;SPAN class=""&gt;"/Volumes/workspace/default/sys/checkpoint")
    .option(&lt;SPAN class=""&gt;"mergeSchema", &lt;SPAN class=""&gt;"true")
    .trigger(availableNow=&lt;SPAN class=""&gt;True)
    .toTable(&lt;SPAN class=""&gt;"uc.default.json_files"))&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/PRE&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Tue, 07 Apr 2026 14:48:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153627#M53978</guid>
      <dc:creator>balajij8</dc:creator>
      <dc:date>2026-04-07T14:48:08Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read files using Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153628#M53979</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/184264"&gt;@AanchalSoni&lt;/a&gt;&amp;nbsp;.&lt;/P&gt;&lt;P&gt;This is a well-known Unity Catalog constraint. Let me explain in detail.&lt;/P&gt;&lt;P&gt;The error &lt;STRONG&gt;INVALID_PARAMETER_VALUE.LOCATION_OVERLAP&lt;/STRONG&gt; is thrown because A&lt;STRONG&gt;uto Loader's checkpoint/schema location overlaps with UC-managed storage&lt;/STRONG&gt;. Specifically:&lt;BR /&gt;&amp;nbsp; &amp;nbsp; UC Volumes are backed by managed S3 paths under Databricks' internal storage &lt;STRONG&gt;(dbstorage-prod-*/uc/.../)&lt;/STRONG&gt;.&lt;BR /&gt;&amp;nbsp; &amp;nbsp; Auto Loader writes its &lt;STRONG&gt;_dlt_metadata/_autoloader&lt;/STRONG&gt; checkpoint directory into that same managed path space.&lt;BR /&gt;&amp;nbsp; &amp;nbsp; UC's &lt;STRONG&gt;CheckPathAccess&lt;/STRONG&gt; guard explicitly blocks any process from writing into managed storage paths it doesn't own — including Auto Loader's internal bookkeeping.&lt;/P&gt;&lt;P&gt;This is not a permissions issue you can grant your way out of. It's a &lt;STRONG&gt;hard architectural constraint&lt;/STRONG&gt; in Unity Catalog.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;The Fix: Separate the Checkpoint Location&lt;/STRONG&gt;&lt;BR /&gt;You don't need to move your source files to an external location. You just need to point the &lt;STRONG&gt;checkpoint and schema location&lt;/STRONG&gt; somewhere outside UC-managed storage.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Option 1 — External Location (Recommended for Production)&lt;/STRONG&gt;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;df = (
    spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "parquet")           # or json, csv, etc.
    .option("cloudFiles.schemaLocation", "s3://your-external-bucket/checkpoints/schema/pipeline_x")
    .load("/Volumes/your_catalog/&amp;lt;schema&amp;gt;/&amp;lt;volume&amp;gt;/raw/")  # UC Volume path — fine here
    .writeStream
    .option("checkpointLocation", "s3://your-external-bucket/checkpoints/pipeline_x")
    .table("your_catalog.&amp;lt;schema&amp;gt;.target_table")
)&lt;/LI-CODE&gt;&lt;P&gt;The external bucket must be registered as a &lt;STRONG&gt;UC External Location&lt;/STRONG&gt; with &lt;STRONG&gt;CREATE EXTERNAL LOCATION&lt;/STRONG&gt; and appropriate storage credentials.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Option 2 — Use DLT (Cleanest for UC)&lt;/STRONG&gt;&lt;BR /&gt;DLT manages its own checkpoint state completely outside your control path, so you never hit this conflict:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import dlt

@dlt.table
def bronze_raw():
    return (
        spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "parquet")
        .option("cloudFiles.schemaLocation",
                "/Volumes/your_catalog/&amp;lt;schema&amp;gt;/&amp;lt;volume&amp;gt;/autoloader_schema/")
        .load("/Volumes/your_catalog/&amp;lt;schema&amp;gt;/&amp;lt;volume&amp;gt;/raw/")
    )&lt;/LI-CODE&gt;&lt;P&gt;&lt;BR /&gt;Note that with DLT, the &lt;STRONG&gt;schemaLocation&lt;/STRONG&gt; can live inside the Volume (it's only the checkpoint that conflicts, not the schema inference directory in all cases — though keeping it external is cleaner).&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;Summary Recommendation&lt;/STRONG&gt;&lt;BR /&gt;Your source files staying in the UC Volume is perfectly fine and correct. The only change needed is routing your &lt;STRONG&gt;checkpointLocation&lt;/STRONG&gt; and &lt;STRONG&gt;schemaLocation&lt;/STRONG&gt; to a registered UC External Location on S3. If this pipeline is already in a DLT context (given your medallion setup in your catalog), the DLT option is the cleanest path with zero checkpoint management overhead.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Apr 2026 15:03:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153628#M53979</guid>
      <dc:creator>lingareddy_Alva</dc:creator>
      <dc:date>2026-04-07T15:03:02Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read files using Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153639#M53981</link>
      <description>&lt;P&gt;Thanks&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/89873"&gt;@BalaS&lt;/a&gt;&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/24053"&gt;@lingareddy_Alva&lt;/a&gt;&amp;nbsp;for your quick responses.&lt;/P&gt;&lt;P&gt;I've updated the schema location to:&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"schemaLocation"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"/Volumes/workspace/capstone/schema"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;and checkpoint location to:&amp;nbsp;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;/Volumes/workspace/capstone/checkpoint/1/&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;however, I'm still getting the same error. I'm using Databricks free version to develop a test pipeline.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Tue, 07 Apr 2026 16:08:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153639#M53981</guid>
      <dc:creator>AanchalSoni</dc:creator>
      <dc:date>2026-04-07T16:08:58Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read files using Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153640#M53982</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;You need to set schemaLocation in following way (don't ommit cloudFiles prefix)&lt;/P&gt;&lt;P&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;option&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"cloudFiles.schemaLocation"&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt; &lt;SPAN class=""&gt;"&amp;lt;path-to-schema&amp;gt;"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Apr 2026 16:37:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153640#M53982</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2026-04-07T16:37:36Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to read files using Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153645#M53985</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/110502"&gt;@szymon_dybczak&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;That's the root cause right there — Databricks Free Edition.&lt;BR /&gt;Even with corrected schemaLocation and checkpointLocation paths, the Free Edition has a fundamental constraint:&lt;BR /&gt;So no matter where inside a Volume you point your checkpoint, it still lands in UC-managed storage, and the CheckPathAccess guard fires.&lt;/P&gt;&lt;P&gt;Only the checkpointLocation needs to go to DBFS on Free Edition. schemaLocation can stay in your Volume.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;df = (
    spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "csv")
    .option("cloudFiles.schemaLocation", "/Volumes/workspace/capstone/schema/")  # Volume is fine
    .load("/Volumes/workspace/capstone/raw/")
    .writeStream
    .option("checkpointLocation", "dbfs:/tmp/checkpoints/capstone")              # DBFS needed
    .toTable("workspace.capstone.target_table")
)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Apr 2026 17:05:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-read-files-using-auto-loader/m-p/153645#M53985</guid>
      <dc:creator>lingareddy_Alva</dc:creator>
      <dc:date>2026-04-07T17:05:34Z</dc:date>
    </item>
  </channel>
</rss>

