I am also facing the same issue, I have applied the config: `spark.sql.execution.arrow.pyspark.enabled` set to `false`, but still facing the same issue.
Any Idea, what's going on???. Please help me out....
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 39.0 failed 4 times, most recent failure: Lost task 0.3 in stage 39.0 (TID 3789) (10.132.234.41 executor 39): java.lang.IndexOutOfBoundsException: index: 2147483640, length: 174 (expected: range(0, 2147483648))
at org.apache.arrow.memory.ArrowBuf.checkIndex(ArrowBuf.java:699)
at org.apache.arrow.memory.ArrowBuf.setBytes(ArrowBuf.java:890)
at org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1087)
at org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:287)
at org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:151)
at org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:105)
at org.apache.spark.sql.execution.python.ArrowPythonRunner$ArrowWriterThread.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:110)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1657)
at org.apache.spark.sql.execution.python.ArrowPythonRunner$ArrowWriterThread.writeIteratorToStream(ArrowPythonRunner.scala:132)
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:521)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2241)
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:313)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2873)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2820)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2814)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2814)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1350)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1350)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1350)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3081)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3022)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3010)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
Caused by: java.lang.IndexOutOfBoundsException: index: 2147483640, length: 174 (expected: range(0, 2147483648))
at org.apache.arrow.memory.ArrowBuf.checkIndex(ArrowBuf.java:699)
at org.apache.arrow.memory.ArrowBuf.setBytes(ArrowBuf.java:890)
at org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1087)
at org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:287)
at org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:151)
at org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:105)
at org.apache.spark.sql.execution.python.ArrowPythonRunner$ArrowWriterThread.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:110)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1657)
at org.apache.spark.sql.execution.python.ArrowPythonRunner$ArrowWriterThread.writeIteratorToStream(ArrowPythonRunner.scala:132)
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:521)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2241)
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:313)
=== Streaming Query ===
Identifier: [id = 1f85f00f-6e6f-4b42-b178-0fe871f8ec02, runId = 46d257c6-3992-40bc-9353-7d8bb161925c]
Current Committed Offsets: {}