<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: While loading Data from blob to delta lake facing below issue in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16531#M10707</link>
    <description>&lt;P&gt;@chaitanya​&amp;nbsp;, could you please try disabling arrow optimization and see if this resolves the issue?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.sql.execution.arrow.enabled false&lt;/P&gt;&lt;P&gt;spark.sql.execution.arrow.pyspark.enabled false&lt;/P&gt;</description>
    <pubDate>Thu, 06 Jan 2022 22:37:31 GMT</pubDate>
    <dc:creator>shan_chandra</dc:creator>
    <dc:date>2022-01-06T22:37:31Z</dc:date>
    <item>
      <title>While loading Data from blob to delta lake facing below issue</title>
      <link>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16528#M10704</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;I'm calling the stored proc then store into pandas dataframe then creating list while creating list getting below error&lt;/P&gt;
&lt;P&gt;Databricks execution failed with error state Terminated. For more details please check the run page url: path&lt;/P&gt;
&lt;P&gt;An error occurred while calling o3036.collectToPython. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 66 in stage 11629.0 failed 4 times, most recent failure: Lost task 66.3 in stage 11629.0 (TID 421473, 10.49.20.9, executor 7): java.util.NoSuchElementException at org.apache.spark.sql.vectorized.ColumnarBatch$1.next(ColumnarBatch.java:69) at org.apache.spark.sql.vectorized.ColumnarBatch$1.next(ColumnarBatch.java:58) at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$2.next(ArrowConverters.scala:214) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$2.next(ArrowConverters.scala:195) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:733) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processPartition$1(Collector.scala:179) at org.apache.spark.SparkContext.$anonfun$runJob$6(SparkContext.scala:2433) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:640) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:643) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748)&lt;/P&gt;
&lt;P&gt;Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2519) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2466) at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2460) at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2460) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1152) at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1152) at scala.Option.foreach(Option.scala:407) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1152) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2721) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2668) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2656) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:938) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2339) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2434) at org.apache.spark.sql.execution.collect.Collector.runSparkJobs(Collector.scala:273) at org.apache.spark.sql.execution.collect.Collector.collect(Collector.scala:308) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:82) at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:88) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:508) at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:480) at org.apache.spark.sql.execution.SparkPlan.executeCollectResult(SparkPlan.scala:401) at org.apache.spark.sql.Dataset.$anonfun$collectToPython$1(Dataset.scala:3497) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3709) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$5(SQLExecution.scala:116) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:249) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:101) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:845) at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:77) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:199) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3707) at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3495) at sun.reflect.GeneratedMethodAccessor271.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380) at py4j.Gateway.invoke(Gateway.java:295) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:251) at java.lang.Thread.run(Thread.java:748) Caused by: java.util.NoSuchElementException at org.apache.spark.sql.vectorized.ColumnarBatch$1.next(ColumnarBatch.java:69) at org.apache.spark.sql.vectorized.ColumnarBatch$1.next(ColumnarBatch.java:58) at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:44) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$2.next(ArrowConverters.scala:214) at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$2.next(ArrowConverters.scala:195) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:733) at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80) at org.apache.spark.sql.execution.collect.Collector.$anonfun$processPartition$1(Collector.scala:179) at org.apache.spark.SparkContext.$anonfun$runJob$6(SparkContext.scala:2433) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144) at org.apache.spark.scheduler.Task.run(Task.scala:117) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:640) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1581) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:643) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more &lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 13 Aug 2021 15:27:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16528#M10704</guid>
      <dc:creator>chaitanya</dc:creator>
      <dc:date>2021-08-13T15:27:54Z</dc:date>
    </item>
    <item>
      <title>Re: While loading Data from blob to delta lake facing below issue</title>
      <link>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16530#M10706</link>
      <description>&lt;P&gt;Hi @chaitanya​&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please provide more details.&lt;/P&gt;&lt;P&gt;Could you share the code you tried to run? what format was the source data from blob storage? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Mon, 20 Sep 2021 17:57:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16530#M10706</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2021-09-20T17:57:45Z</dc:date>
    </item>
    <item>
      <title>Re: While loading Data from blob to delta lake facing below issue</title>
      <link>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16531#M10707</link>
      <description>&lt;P&gt;@chaitanya​&amp;nbsp;, could you please try disabling arrow optimization and see if this resolves the issue?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.sql.execution.arrow.enabled false&lt;/P&gt;&lt;P&gt;spark.sql.execution.arrow.pyspark.enabled false&lt;/P&gt;</description>
      <pubDate>Thu, 06 Jan 2022 22:37:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/while-loading-data-from-blob-to-delta-lake-facing-below-issue/m-p/16531#M10707</guid>
      <dc:creator>shan_chandra</dc:creator>
      <dc:date>2022-01-06T22:37:31Z</dc:date>
    </item>
  </channel>
</rss>

