<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Error when reading delta lake files with Auto Loader in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6811#M2822</link>
    <description>&lt;P&gt;you can check for yourself:&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/ingestion/auto-loader/" target="test_blank"&gt;https://learn.microsoft.com/en-us/azure/databricks/ingestion/auto-loader/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;I&gt;"Auto Loader can ingest JSON, CSV, PARQUET, AVRO, ORC, TEXT, and BINARYFILE file formats"&lt;/I&gt;&lt;/P&gt;&lt;P&gt;And it makes sense.  Autoloader is a tool to identify what you have already processed.&lt;/P&gt;&lt;P&gt;Delta lake is more than just some files, it has a transaction log.&lt;/P&gt;</description>
    <pubDate>Thu, 30 Mar 2023 13:14:48 GMT</pubDate>
    <dc:creator>-werners-</dc:creator>
    <dc:date>2023-03-30T13:14:48Z</dc:date>
    <item>
      <title>Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6808#M2819</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;When reading Delta Lake file (created by Auto Loader) with this code: df = (&lt;/P&gt;&lt;P&gt; &amp;nbsp; &amp;nbsp;spark.readStream&lt;/P&gt;&lt;P&gt; &amp;nbsp;&amp;nbsp;&amp;nbsp;.format('cloudFiles')&lt;/P&gt;&lt;P&gt; &amp;nbsp;&amp;nbsp;&amp;nbsp;.option("cloudFiles.format", "delta")&lt;/P&gt;&lt;P&gt; &amp;nbsp;&amp;nbsp;&amp;nbsp;.option("cloudFiles.schemaLocation", f"{silver_path}/_checkpoint")&lt;/P&gt;&lt;P&gt; &amp;nbsp;&amp;nbsp;&amp;nbsp;.load(bronze_path)&lt;/P&gt;&lt;P&gt; &amp;nbsp; &amp;nbsp; )&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Receives this error:&lt;/P&gt;&lt;P&gt;AnalysisException: Incompatible format detected. A transaction log for Delta was found at `/mnt/f1/f2/_delta_log`, but you are trying to read from `/mnt/f1/f2/` using format("cloudFiles"). You must use 'format("delta")' when reading and writing to a delta table. To disable this check, SET spark.databricks.delta.formatCheck.enabled=false To learn more about Delta...&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;What's right way of reading Delta Lake files with Auto Loader for further processing (e.g.. from Bronze layer to Silver)? &lt;/P&gt;&lt;P&gt;Thank you!&lt;/P&gt;</description>
      <pubDate>Thu, 30 Mar 2023 04:43:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6808#M2819</guid>
      <dc:creator>Vladif1</dc:creator>
      <dc:date>2023-03-30T04:43:41Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6809#M2820</link>
      <description>&lt;P&gt;As the error mentions: autoloader and delta do not mix.&lt;/P&gt;&lt;P&gt;but there is change data feed on delta lake (as a source):&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/delta/delta-change-data-feed" target="test_blank"&gt;https://learn.microsoft.com/en-us/azure/databricks/delta/delta-change-data-feed&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Like that you do not have to read the whole delta table but only ingest changes.&lt;/P&gt;</description>
      <pubDate>Thu, 30 Mar 2023 10:45:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6809#M2820</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-03-30T10:45:37Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6810#M2821</link>
      <description>&lt;P&gt;&lt;/P&gt;&lt;P&gt;Autoloader doesn't support reading from Delta Lake tables? any other format is supported except delta?&lt;/P&gt;&lt;P&gt;Thank you! &lt;/P&gt;</description>
      <pubDate>Thu, 30 Mar 2023 12:40:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6810#M2821</guid>
      <dc:creator>Vladif1</dc:creator>
      <dc:date>2023-03-30T12:40:49Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6811#M2822</link>
      <description>&lt;P&gt;you can check for yourself:&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/ingestion/auto-loader/" target="test_blank"&gt;https://learn.microsoft.com/en-us/azure/databricks/ingestion/auto-loader/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;I&gt;"Auto Loader can ingest JSON, CSV, PARQUET, AVRO, ORC, TEXT, and BINARYFILE file formats"&lt;/I&gt;&lt;/P&gt;&lt;P&gt;And it makes sense.  Autoloader is a tool to identify what you have already processed.&lt;/P&gt;&lt;P&gt;Delta lake is more than just some files, it has a transaction log.&lt;/P&gt;</description>
      <pubDate>Thu, 30 Mar 2023 13:14:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6811#M2822</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-03-30T13:14:48Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6812#M2823</link>
      <description>&lt;P&gt;Hi @Vlad Feigin​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope everything is going great.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Just wanted to check in if you were able to resolve your issue. If yes, would you be happy to mark an answer as best so that other members can find the solution more quickly? If not, please tell us so we can help you.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Cheers!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 01 Apr 2023 02:20:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/6812#M2823</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-04-01T02:20:43Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94124#M38817</link>
      <description>&lt;P&gt;Autoloader can't read delta lake.&lt;BR /&gt;To use Delta Lake one can use Change Data Feed (with or without streaming).&lt;BR /&gt;&lt;A href="https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/options.html" target="_blank"&gt;https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/options.html&lt;/A&gt;&lt;BR /&gt;And it makes sense: one needs to process the delta lake log to know what files contain the actual data and read deletion vectors.&lt;/P&gt;</description>
      <pubDate>Tue, 15 Oct 2024 14:25:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94124#M38817</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2024-10-15T14:25:07Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94126#M38818</link>
      <description>&lt;P&gt;The Delta Executor is a powerful tool designed to streamline the execution of data processing tasks in cloud environments. It enhances performance by optimizing resource utilization and provides a flexible framework for managing complex workflows. With its support for various data formats and integration with popular data storage solutions, users can easily implement scalable solutions. Additionally, the&lt;A href="https://deltaexecutor-apk.com/" target="_self"&gt; Delta Executor&lt;/A&gt; ensures data consistency and reliability through transaction support. This makes it an essential component for modern data engineering and analytics pipelines.&lt;/P&gt;</description>
      <pubDate>Tue, 15 Oct 2024 14:26:07 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94126#M38818</guid>
      <dc:creator>Saqlain12</dc:creator>
      <dc:date>2024-10-15T14:26:07Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94215#M38838</link>
      <description>&lt;P&gt;Thanks for sharing this useful link:&amp;nbsp;&lt;A href="https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/options.html" target="_blank" rel="nofollow noopener noreferrer"&gt;https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/options.html&lt;/A&gt;&lt;A href="https://deltaexecutorapp.com/" target="_blank"&gt;/&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 15 Oct 2024 23:21:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94215#M38838</guid>
      <dc:creator>Johni1</dc:creator>
      <dc:date>2024-10-15T23:21:58Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading delta lake files with Auto Loader</title>
      <link>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94217#M38840</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/58454"&gt;@Vladif1&lt;/a&gt;&amp;nbsp;The error occurs because the cloudFiles format in Auto Loader is meant for reading raw file formats like CSV, JSON ... for ingestion for more &lt;A href="https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/options.html/" target="_self"&gt;Format Support&lt;/A&gt;. For Delta tables, you should use the Delta format directly.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;#Sample Example

bronze_path = "/mnt/bronze_layer"
silver_path = "/mnt/silver_layer"

raw_df = (
    spark.readStream
    .format("cloudFiles")
    .option("cloudFiles.format", "json")
    .option("cloudFiles.schemaLocation", f"{bronze_path}/_schema_checkpoint")
    .load("/mnt/raw_data_path")
)

(raw_df
    .writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", f"{bronze_path}/_checkpoint")
    .start(bronze_path)
)

bronze_df = (
    spark.readStream
    .format("delta")  # Delta format for reading
    .load(bronze_path)  # Path to Bronze Delta table
)

# Perform any necessary transformations for the Silver layer.

silver_df = bronze_df.withColumn("processed_timestamp", current_timestamp())

# Write the transformed data to the Silver layer
(silver_df
    .writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", f"{silver_path}/_checkpoint")
    .start(silver_path)
)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 16 Oct 2024 00:09:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-when-reading-delta-lake-files-with-auto-loader/m-p/94217#M38840</guid>
      <dc:creator>Panda</dc:creator>
      <dc:date>2024-10-16T00:09:31Z</dc:date>
    </item>
  </channel>
</rss>

