<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: org.apache.spark.SparkException: Job aborted due to stage failure during Model Training in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58896#M2927</link>
    <description>&lt;P&gt;Stage failed because barrier task ResultTask(66, 1) finished unsuccessfully.&lt;/P&gt;&lt;P&gt;ExecutorLostFailure (executor 13 exited unrelated to the running tasks)&lt;/P&gt;&lt;P&gt;Reason: Executor decommission.org.apache.spark.rdd.RDD.collect(RDD.scala:1034) org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:260) org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala) sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) java.lang.reflect.Method.invoke(Method.java:498) py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380) py4j.Gateway.invoke(Gateway.java:295) py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) py4j.commands.CallCommand.execute(CallCommand.java:79) py4j.GatewayConnection.run(GatewayConnection.java:251) java.lang.Thread.run(Thread.java:748)&lt;/P&gt;</description>
    <pubDate>Wed, 31 Jan 2024 21:01:28 GMT</pubDate>
    <dc:creator>VeereshKH</dc:creator>
    <dc:date>2024-01-31T21:01:28Z</dc:date>
    <item>
      <title>org.apache.spark.SparkException: Job aborted due to stage failure during Model Training</title>
      <link>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58840#M2922</link>
      <description>&lt;P&gt;&lt;SPAN&gt;org.apache.spark.SparkException: Job aborted due to stage failure: Could not recover from a failed barrier ResultStage. Most recent failure reason: Stage failed because barrier task ResultTask(160, 13) finished unsuccessfully.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 31 Jan 2024 13:10:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58840#M2922</guid>
      <dc:creator>VeereshKH</dc:creator>
      <dc:date>2024-01-31T13:10:47Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure during Model Training</title>
      <link>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58856#M2925</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/99102"&gt;@VeereshKH&lt;/a&gt;&amp;nbsp;, I hope you are doing well.&lt;/P&gt;
&lt;P&gt;Could you please share the complete error message and also confirm if you are using any spark configuration on the cluster?&lt;/P&gt;
&lt;P&gt;We have seen &lt;EM&gt;"spark.databricks.pyspark.enableProcessIsolation" &lt;/EM&gt;spark configuration was causing the problem in multiple scenarios. If you are using the same property, please try to remove it and rerun the code.&lt;/P&gt;
&lt;P&gt;Please keep us posted with the results.&lt;/P&gt;
&lt;P&gt;Thank you and wishing you an amazing day ahead!&lt;/P&gt;</description>
      <pubDate>Wed, 31 Jan 2024 15:41:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58856#M2925</guid>
      <dc:creator>Yeshwanth</dc:creator>
      <dc:date>2024-01-31T15:41:45Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure during Model Training</title>
      <link>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58877#M2926</link>
      <description>&lt;P&gt;Could you share the stage details where the issue happened?&lt;/P&gt;</description>
      <pubDate>Wed, 31 Jan 2024 18:15:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58877#M2926</guid>
      <dc:creator>Lakshay</dc:creator>
      <dc:date>2024-01-31T18:15:47Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure during Model Training</title>
      <link>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58896#M2927</link>
      <description>&lt;P&gt;Stage failed because barrier task ResultTask(66, 1) finished unsuccessfully.&lt;/P&gt;&lt;P&gt;ExecutorLostFailure (executor 13 exited unrelated to the running tasks)&lt;/P&gt;&lt;P&gt;Reason: Executor decommission.org.apache.spark.rdd.RDD.collect(RDD.scala:1034) org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:260) org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala) sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) java.lang.reflect.Method.invoke(Method.java:498) py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380) py4j.Gateway.invoke(Gateway.java:295) py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) py4j.commands.CallCommand.execute(CallCommand.java:79) py4j.GatewayConnection.run(GatewayConnection.java:251) java.lang.Thread.run(Thread.java:748)&lt;/P&gt;</description>
      <pubDate>Wed, 31 Jan 2024 21:01:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/58896#M2927</guid>
      <dc:creator>VeereshKH</dc:creator>
      <dc:date>2024-01-31T21:01:28Z</dc:date>
    </item>
  </channel>
</rss>

