<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 188.0 failed in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38888#M26804</link>
    <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/85392"&gt;@rchauhan&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This error appears when we try to read the data from SQL server using a single connection. I would suggest to use numPartitions, lowerBound and upperBound configs to parallelize your data read.&lt;/P&gt;&lt;P&gt;You can find a detailed documentation here -&amp;nbsp;&lt;A href="https://docs.databricks.com/en/external-data/jdbc.html#:~:text=save()%0A)-,Control%20parallelism%20for%20JDBC%20queries,-By%20default%2C%20the" target="_blank"&gt;https://docs.databricks.com/en/external-data/jdbc.html#:~:text=save()%0A)-,Control%20parallelism%20for%20JDBC%20queries,-By%20default%2C%20the&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 02 Aug 2023 07:06:51 GMT</pubDate>
    <dc:creator>Tharun-Kumar</dc:creator>
    <dc:date>2023-08-02T07:06:51Z</dc:date>
    <item>
      <title>org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 188.0 failed 4</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38875#M26798</link>
      <description>&lt;P&gt;&lt;SPAN&gt;When I am trying to read the data from sql server through jdbc connect , I get the below error while merging the data into databricks table . Can you please help whats the issue related to?&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 188.0 failed 4 times, most recent failure: Lost task 1.3 in stage 188.0 (TID 1823) (10###.#&lt;/SPAN&gt;&lt;SPAN&gt;.# executor 9): ExecutorLostFailure (executor 9 exited caused by one of the running tasks) Reason: Command exited with code 50 Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3376) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3308) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3299) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3299) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1428) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1428) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1428) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3588) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3526) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3514) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:51)&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 01 Aug 2023 23:58:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38875#M26798</guid>
      <dc:creator>rchauhan</dc:creator>
      <dc:date>2023-08-01T23:58:02Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 188.0 failed</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38888#M26804</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/85392"&gt;@rchauhan&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This error appears when we try to read the data from SQL server using a single connection. I would suggest to use numPartitions, lowerBound and upperBound configs to parallelize your data read.&lt;/P&gt;&lt;P&gt;You can find a detailed documentation here -&amp;nbsp;&lt;A href="https://docs.databricks.com/en/external-data/jdbc.html#:~:text=save()%0A)-,Control%20parallelism%20for%20JDBC%20queries,-By%20default%2C%20the" target="_blank"&gt;https://docs.databricks.com/en/external-data/jdbc.html#:~:text=save()%0A)-,Control%20parallelism%20for%20JDBC%20queries,-By%20default%2C%20the&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 02 Aug 2023 07:06:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38888#M26804</guid>
      <dc:creator>Tharun-Kumar</dc:creator>
      <dc:date>2023-08-02T07:06:51Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 188.0 failed</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38958#M26825</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/39403"&gt;@Tharun-Kumar&lt;/a&gt;&amp;nbsp;. I am already using&amp;nbsp;&lt;SPAN&gt;numPartitions, lowerBound and upperBound configs to parallelize my data read. Still I see the same error.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;&lt;SPAN class=""&gt;df=spark.read.option("numPartitions", 32).option("fetchSize", "1000").option("partitionColumn", "Key").option("lowerBound", min_o).option("upperBound", max_o).jdbc(url=jdbcUrl,table=f"({query_attr}) t ",properties=connectionProperties)&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 02 Aug 2023 17:58:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/38958#M26825</guid>
      <dc:creator>rchauhan</dc:creator>
      <dc:date>2023-08-02T17:58:35Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 188.0 failed</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/64531#M32598</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/85392"&gt;@rchauhan&lt;/a&gt;&amp;nbsp;did you find a solution to the problem or know what settings caused the problem ?&lt;/P&gt;</description>
      <pubDate>Mon, 25 Mar 2024 15:38:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/64531#M32598</guid>
      <dc:creator>MDV</dc:creator>
      <dc:date>2024-03-25T15:38:00Z</dc:date>
    </item>
  </channel>
</rss>

