<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Databricks streaming job issue with Autoloader for new checkpoint. in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-streaming-job-issue-with-autoloader-for-new/m-p/11286#M6288</link>
    <description>&lt;P&gt;Hi Team,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am trying to run a streaming job in databricks, used Autoloader approach for reading the files from the Azure Datalake Gen2 which is in parquet format. I have created a new checkpoint, so first offset is getting created but throwing an error that : "py4j.Py4JException: An exception was raised by the Python Proxy. Return Message: Traceback (most recent call last):" &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have opened that error within that I got : &lt;/P&gt;&lt;P&gt;"py4j.protocol.Py4JJavaError: An error occurred while calling o2990.save. : org.apache.spark.SparkException: Job aborted." , "Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 11.0 failed 4 times, most recent failure: Lost task 5.3 in stage 11.0 (TID 115) (172.20.58.133 executor 1): com.databricks.sql.io.FileReadException: Error while reading file /mnt/adl2/kind=data/evolution=2/file_format=parquet/ingestion_date=2022/08/03/13/-13abc.parquet."&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;"Caused by: java.lang.AssertionError: assertion failed"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;What could be the reason, please provide the solution.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 04 Aug 2022 06:46:38 GMT</pubDate>
    <dc:creator>Himanshi</dc:creator>
    <dc:date>2022-08-04T06:46:38Z</dc:date>
    <item>
      <title>Databricks streaming job issue with Autoloader for new checkpoint.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-streaming-job-issue-with-autoloader-for-new/m-p/11286#M6288</link>
      <description>&lt;P&gt;Hi Team,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am trying to run a streaming job in databricks, used Autoloader approach for reading the files from the Azure Datalake Gen2 which is in parquet format. I have created a new checkpoint, so first offset is getting created but throwing an error that : "py4j.Py4JException: An exception was raised by the Python Proxy. Return Message: Traceback (most recent call last):" &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have opened that error within that I got : &lt;/P&gt;&lt;P&gt;"py4j.protocol.Py4JJavaError: An error occurred while calling o2990.save. : org.apache.spark.SparkException: Job aborted." , "Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 11.0 failed 4 times, most recent failure: Lost task 5.3 in stage 11.0 (TID 115) (172.20.58.133 executor 1): com.databricks.sql.io.FileReadException: Error while reading file /mnt/adl2/kind=data/evolution=2/file_format=parquet/ingestion_date=2022/08/03/13/-13abc.parquet."&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;"Caused by: java.lang.AssertionError: assertion failed"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;What could be the reason, please provide the solution.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 04 Aug 2022 06:46:38 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-streaming-job-issue-with-autoloader-for-new/m-p/11286#M6288</guid>
      <dc:creator>Himanshi</dc:creator>
      <dc:date>2022-08-04T06:46:38Z</dc:date>
    </item>
  </channel>
</rss>

