<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Python worker exited unexpectedly (crashed) in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/97014#M39769</link>
    <description>&lt;P&gt;I have a failing pipeline which results in the following failure:&lt;/P&gt;&lt;DIV&gt;org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2053.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2053.0 (TID 4594) (10.171.199.129 executor 0): org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handleExceptionUnderIsolation(PythonRunner.scala:596)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:607)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:600)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:120)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:98)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:507)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(Unknown Source)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:195)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:92)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:87)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:211)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.util.Using$.resource(Using.scala:269)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.run(Task.scala:102)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1036)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1039)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:926)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.lang.Thread.run(Thread.java:750)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;Caused by: java.io.IOException: Connection reset by peer&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.FileDispatcherImpl.write0(Native Method)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.IOUtil.write(IOUtil.java:51)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:470)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker(PythonRunner.scala:859)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read(PythonRunner.scala:755)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.io.BufferedInputStream.read(BufferedInputStream.java:265)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.io.DataInputStream.readInt(DataInputStream.java:387)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:106)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;... 41 more&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Driver stacktrace:&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$failJobAndIndependentStages$1(DAGScheduler.scala:3998)&lt;/DIV&gt;&lt;DIV&gt;at scala.Option.getOrElse(Option.scala:189)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3996)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3910)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3897)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3897)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1758)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1741)&lt;/DIV&gt;&lt;DIV&gt;at scala.Option.foreach(Option.scala:407)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1741)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:4256)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4159)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4145)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:55)&lt;/DIV&gt;&lt;DIV&gt;org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handleExceptionUnderIsolation(PythonRunner.scala:596)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:607)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:600)&lt;/DIV&gt;&lt;DIV&gt;at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:120)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:98)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:507)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(null:-1)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:195)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:92)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:87)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:211)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)&lt;/DIV&gt;&lt;DIV&gt;at scala.util.Using$.resource(Using.scala:269)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.run(Task.scala:102)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1036)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1039)&lt;/DIV&gt;&lt;DIV&gt;at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:926)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/DIV&gt;&lt;DIV&gt;at java.lang.Thread.run(Thread.java:750)&lt;/DIV&gt;&lt;DIV&gt;java.io.IOException: Connection reset by peer&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.FileDispatcherImpl.write0(FileDispatcherImpl.java:-2)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.IOUtil.write(IOUtil.java:51)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:470)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker(PythonRunner.scala:859)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read(PythonRunner.scala:755)&lt;/DIV&gt;&lt;DIV&gt;at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)&lt;/DIV&gt;&lt;DIV&gt;at java.io.BufferedInputStream.read(BufferedInputStream.java:265)&lt;/DIV&gt;&lt;DIV&gt;at java.io.DataInputStream.readInt(DataInputStream.java:387)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:106)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:98)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:507)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(null:-1)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:195)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:92)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:87)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:211)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)&lt;/DIV&gt;&lt;DIV&gt;at scala.util.Using$.resource(Using.scala:269)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.run(Task.scala:102)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1036)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1039)&lt;/DIV&gt;&lt;DIV&gt;at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:926)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/DIV&gt;&lt;DIV&gt;at java.lang.Thread.run(Thread.java:750)&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;I have tried to add more workers and a larger cluster, but haven't had any luck. Would appreciate any advice.&lt;/DIV&gt;</description>
    <pubDate>Thu, 31 Oct 2024 15:33:17 GMT</pubDate>
    <dc:creator>zsh24</dc:creator>
    <dc:date>2024-10-31T15:33:17Z</dc:date>
    <item>
      <title>Python worker exited unexpectedly (crashed)</title>
      <link>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/97014#M39769</link>
      <description>&lt;P&gt;I have a failing pipeline which results in the following failure:&lt;/P&gt;&lt;DIV&gt;org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 2053.0 failed 4 times, most recent failure: Lost task 0.3 in stage 2053.0 (TID 4594) (10.171.199.129 executor 0): org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handleExceptionUnderIsolation(PythonRunner.scala:596)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:607)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:600)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:120)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:98)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:507)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(Unknown Source)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:195)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:92)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:87)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:211)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.util.Using$.resource(Using.scala:269)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.scheduler.Task.run(Task.scala:102)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1036)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1039)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:926)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.lang.Thread.run(Thread.java:750)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;Caused by: java.io.IOException: Connection reset by peer&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.FileDispatcherImpl.write0(Native Method)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.IOUtil.write(IOUtil.java:51)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:470)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker(PythonRunner.scala:859)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read(PythonRunner.scala:755)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.io.BufferedInputStream.read(BufferedInputStream.java:265)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at java.io.DataInputStream.readInt(DataInputStream.java:387)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:106)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;... 41 more&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;Driver stacktrace:&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$failJobAndIndependentStages$1(DAGScheduler.scala:3998)&lt;/DIV&gt;&lt;DIV&gt;at scala.Option.getOrElse(Option.scala:189)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3996)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3910)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3897)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3897)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1758)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1741)&lt;/DIV&gt;&lt;DIV&gt;at scala.Option.foreach(Option.scala:407)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1741)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:4256)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4159)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4145)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:55)&lt;/DIV&gt;&lt;DIV&gt;org.apache.spark.SparkException: Python worker exited unexpectedly (crashed)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handleExceptionUnderIsolation(PythonRunner.scala:596)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:607)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator$$anonfun$1.applyOrElse(PythonRunner.scala:600)&lt;/DIV&gt;&lt;DIV&gt;at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:38)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:120)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:98)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:507)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(null:-1)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:195)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:92)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:87)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:211)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)&lt;/DIV&gt;&lt;DIV&gt;at scala.util.Using$.resource(Using.scala:269)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.run(Task.scala:102)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1036)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1039)&lt;/DIV&gt;&lt;DIV&gt;at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:926)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/DIV&gt;&lt;DIV&gt;at java.lang.Thread.run(Thread.java:750)&lt;/DIV&gt;&lt;DIV&gt;java.io.IOException: Connection reset by peer&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.FileDispatcherImpl.write0(FileDispatcherImpl.java:-2)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.SocketDispatcher.write(SocketDispatcher.java:47)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.IOUtil.writeFromNativeBuffer(IOUtil.java:93)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.IOUtil.write(IOUtil.java:51)&lt;/DIV&gt;&lt;DIV&gt;at sun.nio.ch.SocketChannelImpl.write(SocketChannelImpl.java:470)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker(PythonRunner.scala:859)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read(PythonRunner.scala:755)&lt;/DIV&gt;&lt;DIV&gt;at java.io.BufferedInputStream.fill(BufferedInputStream.java:246)&lt;/DIV&gt;&lt;DIV&gt;at java.io.BufferedInputStream.read(BufferedInputStream.java:265)&lt;/DIV&gt;&lt;DIV&gt;at java.io.DataInputStream.readInt(DataInputStream.java:387)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:106)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.python.BasePythonUDFRunner$$anon$2.read(PythonUDFRunner.scala:98)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:507)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(null:-1)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:195)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:56)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$3(ShuffleMapTask.scala:92)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.$anonfun$runTask$1(ShuffleMapTask.scala:87)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:58)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:39)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:211)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:51)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:104)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:109)&lt;/DIV&gt;&lt;DIV&gt;at scala.util.Using$.resource(Using.scala:269)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:108)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.scheduler.Task.run(Task.scala:102)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1036)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1039)&lt;/DIV&gt;&lt;DIV&gt;at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/DIV&gt;&lt;DIV&gt;at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/DIV&gt;&lt;DIV&gt;at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:926)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/DIV&gt;&lt;DIV&gt;at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/DIV&gt;&lt;DIV&gt;at java.lang.Thread.run(Thread.java:750)&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;I have tried to add more workers and a larger cluster, but haven't had any luck. Would appreciate any advice.&lt;/DIV&gt;</description>
      <pubDate>Thu, 31 Oct 2024 15:33:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/97014#M39769</guid>
      <dc:creator>zsh24</dc:creator>
      <dc:date>2024-10-31T15:33:17Z</dc:date>
    </item>
    <item>
      <title>Re: Python worker exited unexpectedly (crashed)</title>
      <link>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/99862#M40118</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/130644"&gt;@zsh24&lt;/a&gt;&amp;nbsp;what you're seeing is a wrapper exception, the underlying and true exception, if not in the stdout log, you'll find it in the Python side. To understand what is failing in the python worker, not the executor jvm, you should analyze the code executed in Python.&lt;/P&gt;</description>
      <pubDate>Sat, 23 Nov 2024 18:31:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/99862#M40118</guid>
      <dc:creator>VZLA</dc:creator>
      <dc:date>2024-11-23T18:31:42Z</dc:date>
    </item>
    <item>
      <title>Re: Python worker exited unexpectedly (crashed)</title>
      <link>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/102956#M41282</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/130644"&gt;@zsh24&lt;/a&gt;&amp;nbsp;,&lt;BR /&gt;This is a generic wrapper exception, Please share the more detailed error. You can get that error by logging it to the driver using Driver Logging, or try to store these logs in DB table. So basically fire a INSERT INTO SQL statement in &lt;STRONG&gt;Exception&lt;/STRONG&gt; block all the entire details, and you can fetch this error then from that table.&lt;/P&gt;</description>
      <pubDate>Mon, 23 Dec 2024 07:07:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/102956#M41282</guid>
      <dc:creator>MujtabaNoori</dc:creator>
      <dc:date>2024-12-23T07:07:26Z</dc:date>
    </item>
    <item>
      <title>Re: Python worker exited unexpectedly (crashed)</title>
      <link>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/103039#M41304</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/130644"&gt;@zsh24&lt;/a&gt;&amp;nbsp;, just checking if you were able to address the problem or need further guidance?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Dec 2024 16:03:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/python-worker-exited-unexpectedly-crashed/m-p/103039#M41304</guid>
      <dc:creator>VZLA</dc:creator>
      <dc:date>2024-12-23T16:03:15Z</dc:date>
    </item>
  </channel>
</rss>

