<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer failed while pushing dataframe to azure cosmos db. in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7697#M3489</link>
    <description>&lt;P&gt;Hi, could you please post the whole text of the error (not the screenshot.)? &lt;/P&gt;&lt;P&gt;Please tag&amp;nbsp;&lt;A href="https://community.databricks.com/s/profile/0053f000000WWwvAAG" alt="https://community.databricks.com/s/profile/0053f000000WWwvAAG" target="_blank"&gt;@Debayan&lt;/A&gt;​&amp;nbsp;with your next response which will notify me, Thank you!&lt;/P&gt;</description>
    <pubDate>Thu, 16 Mar 2023 06:10:11 GMT</pubDate>
    <dc:creator>Debayan</dc:creator>
    <dc:date>2023-03-16T06:10:11Z</dc:date>
    <item>
      <title>org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer failed while pushing dataframe to azure cosmos db.</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7696#M3488</link>
      <description>&lt;P&gt;I am writing data to the azure cosmos db using OLTP connector using below code&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;cfg["spark.cosmos.write.strategy"]="ItemOverwrite"   
 json_df.write.format("cosmos.oltp").options(**cfg).mode("APPEND").save()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I am getting below error&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/522iA065A69EFCAA3333/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/534i1D0B420DC877966E/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please let me know if you have faced similar issue or if you have any fix.&lt;/P&gt;&lt;P&gt;Feel free to ask more details&lt;/P&gt;</description>
      <pubDate>Wed, 15 Mar 2023 13:14:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7696#M3488</guid>
      <dc:creator>manasa</dc:creator>
      <dc:date>2023-03-15T13:14:42Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer failed while pushing dataframe to azure cosmos db.</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7698#M3490</link>
      <description>&lt;P&gt;Hi @Debayan Mukherjee​&amp;nbsp;&lt;/P&gt;&lt;P&gt;Here is the error message&lt;/P&gt;&lt;P&gt;An error occurred while calling o1616.save.&lt;/P&gt;&lt;P&gt;: org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer (attemptNumber=0, stage=1306, partition=0) failed; but task commit success, data duplication may happen. reason=TaskKilled(preempted by scheduler,Vector(AccumulableInfo(61880,None,Some(248166),None,false,true,None), AccumulableInfo(61882,None,Some(0),None,false,true,None), AccumulableInfo(61883,None,Some(430),None,false,true,None), AccumulableInfo(61901,None,Some(19155841),None,false,true,None), AccumulableInfo(61902,None,Some(4240),None,false,true,None), AccumulableInfo(61903,None,Some(134710060),None,false,true,None), AccumulableInfo(61904,None,Some(19155841),None,false,true,None), AccumulableInfo(61909,None,Some(0),None,false,true,None), AccumulableInfo(61910,None,Some(19155841),None,false,true,None), AccumulableInfo(61911,None,Some(8),None,false,true,None), AccumulableInfo(61912,None,Some(395),None,false,true,None), AccumulableInfo(61913,None,Some(0),None,false,true,None), AccumulableInfo(61914,None,Some(0),None,false,true,None)),Vector(LongAccumulator(id: 61880, name: Some(internal.metrics.executorRunTime), value: 248166), LongAccumulator(id: 61882, name: Some(internal.metrics.resultSize), value: 0), LongAccumulator(id: 61883, name: Some(internal.metrics.jvmGCTime), value: 430), LongAccumulator(id: 61901, name: Some(internal.metrics.input.bytesRead), value: 19155841), LongAccumulator(id: 61902, name: Some(internal.metrics.input.recordsRead), value: 4240), LongAccumulator(id: 61903, name: Some(internal.metrics.input.sampledTimeReadNano), value: 134710060), LongAccumulator(id: 61904, name: Some(internal.metrics.input.sampledBytesRead), value: 19155841), LongAccumulator(id: 61909, name: Some(internal.metrics.io.requestBytesCount), value: 0), LongAccumulator(id: 61910, name: Some(internal.metrics.io.responseBytesCount), value: 19155841), LongAccumulator(id: 61911, name: Some(internal.metrics.io.requestCount), value: 8), LongAccumulator(id: 61912, name: Some(internal.metrics.io.requestMsDuration), value: 395), LongAccumulator(id: 61913, name: Some(internal.metrics.io.retryCount), value: 0), LongAccumulator(id: 61914, name: Some(internal.metrics.io.retryDelayMsDuration), value: 0)),WrappedArray(1757792296, 367759552, 0, 0, 4755036, 0, 4755036, 0, 14168641, 0, 0, 0, 0, 0, 0, 0, 64, 4546, 7, 3349, 7895))&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3334)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3266)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3257)&lt;/P&gt;&lt;P&gt;	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)&lt;/P&gt;&lt;P&gt;	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)&lt;/P&gt;&lt;P&gt;	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3257)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleStageFailed$1(DAGScheduler.scala:1418)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleStageFailed$1$adapted(DAGScheduler.scala:1418)&lt;/P&gt;&lt;P&gt;	at scala.Option.foreach(Option.scala:407)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.handleStageFailed(DAGScheduler.scala:1418)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3543)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3484)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3472)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:51)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$runJob$1(DAGScheduler.scala:1172)&lt;/P&gt;&lt;P&gt;	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1160)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2731)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2714)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2(WriteToDataSourceV2Exec.scala:388)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec.writeWithV2$(WriteToDataSourceV2Exec.scala:364)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.writeWithV2(WriteToDataSourceV2Exec.scala:250)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run(WriteToDataSourceV2Exec.scala:343)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2ExistingTableWriteExec.run$(WriteToDataSourceV2Exec.scala:342)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.AppendDataExec.run(WriteToDataSourceV2Exec.scala:250)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.$anonfun$result$1(V2CommandExec.scala:47)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:47)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:45)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:54)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.$anonfun$applyOrElse$3(QueryExecution.scala:238)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:153)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.$anonfun$applyOrElse$2(QueryExecution.scala:238)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$8(SQLExecution.scala:227)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:410)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withCustomExecutionEnv$1(SQLExecution.scala:172)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1035)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:122)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:360)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.$anonfun$applyOrElse$1(QueryExecution.scala:237)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.org$apache$spark$sql$execution$QueryExecution$$withMVTagsIfNecessary(QueryExecution.scala:220)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:233)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution$$anonfun$$nestedInanonfun$eagerlyExecuteCommands$1$1.applyOrElse(QueryExecution.scala:226)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:519)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:106)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:519)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:31)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:316)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:312)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:31)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:495)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.$anonfun$eagerlyExecuteCommands$1(QueryExecution.scala:226)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.allowInvokingTransformsInAnalyzer(AnalysisHelper.scala:372)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:226)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:180)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:171)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:287)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:964)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:346)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:258)&lt;/P&gt;&lt;P&gt;	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)&lt;/P&gt;&lt;P&gt;	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)&lt;/P&gt;&lt;P&gt;	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)&lt;/P&gt;&lt;P&gt;	at java.lang.reflect.Method.invoke(Method.java:498)&lt;/P&gt;&lt;P&gt;	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)&lt;/P&gt;&lt;P&gt;	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)&lt;/P&gt;&lt;P&gt;	at py4j.Gateway.invoke(Gateway.java:306)&lt;/P&gt;&lt;P&gt;	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)&lt;/P&gt;&lt;P&gt;	at py4j.commands.CallCommand.execute(CallCommand.java:79)&lt;/P&gt;&lt;P&gt;	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)&lt;/P&gt;&lt;P&gt;	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)&lt;/P&gt;&lt;P&gt;	at java.lang.Thread.run(Thread.java:750)&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2023 06:20:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7698#M3490</guid>
      <dc:creator>manasa</dc:creator>
      <dc:date>2023-03-16T06:20:42Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer failed while pushing dataframe to azure cosmos db.</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7699#M3491</link>
      <description>&lt;P&gt;Hi @Manasa Kalluri​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you for your question! To assist you better, please take a moment to review the answer and let me know if it best fits your needs.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Please help us select the best solution by clicking on "Select As Best" if it does.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Your feedback will help us ensure that we are providing the best possible service to you. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 19 Mar 2023 04:47:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7699#M3491</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-03-19T04:47:21Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer failed while pushing dataframe to azure cosmos db.</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7697#M3489</link>
      <description>&lt;P&gt;Hi, could you please post the whole text of the error (not the screenshot.)? &lt;/P&gt;&lt;P&gt;Please tag&amp;nbsp;&lt;A href="https://community.databricks.com/s/profile/0053f000000WWwvAAG" alt="https://community.databricks.com/s/profile/0053f000000WWwvAAG" target="_blank"&gt;@Debayan&lt;/A&gt;​&amp;nbsp;with your next response which will notify me, Thank you!&lt;/P&gt;</description>
      <pubDate>Thu, 16 Mar 2023 06:10:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7697#M3489</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2023-03-16T06:10:11Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Authorized committer failed while pushing dataframe to azure cosmos db.</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7700#M3492</link>
      <description>&lt;P&gt;Hi, I do not see any exact error on the issue, primarily it is looking there is connection issue from control plane to data plane. It would be better if you could raise a case to Databricks support on the same and we can triage it. &lt;/P&gt;</description>
      <pubDate>Mon, 20 Mar 2023 16:09:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/7700#M3492</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2023-03-20T16:09:54Z</dc:date>
    </item>
  </channel>
</rss>

