<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Concurrent Jobs - The spark driver has stopped unexpectedly! in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8642#M4227</link>
    <description>&lt;P&gt;Yes, I'm monitoring driver memory in Ganglia (Attaching SS of the driver node).&lt;/P&gt;&lt;P&gt;what might be the list of operations are done on the driver side which I need to avoid ?&lt;/P&gt;</description>
    <pubDate>Wed, 26 Apr 2023 11:09:13 GMT</pubDate>
    <dc:creator>JKR</dc:creator>
    <dc:date>2023-04-26T11:09:13Z</dc:date>
    <item>
      <title>Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8631#M4216</link>
      <description>&lt;P&gt;Hi, I am running concurrent notebooks in concurrent workflow jobs in job compute cluster c5a.8xlarge with 5-7 worker nodes. Each job has 100 concurrent child notebooks and there are 10 job instances. 8/10 jobs gives the error &lt;B&gt;the spark driver has stopped unexpectedly and is restarting. Your notebook will be automatically reattached. &lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;How can I resolve that?&lt;/B&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2023 20:29:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8631#M4216</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-02-27T20:29:48Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8632#M4217</link>
      <description>&lt;P&gt;@uzair mustafa​&amp;nbsp;&lt;/P&gt;&lt;P&gt;Check the Ganglia for performance related issues (maybe it's getting OOM?).&lt;/P&gt;</description>
      <pubDate>Tue, 28 Feb 2023 06:22:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8632#M4217</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2023-02-28T06:22:43Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8633#M4218</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I have been checking Ganglia. Free Space is about 300GB available. &lt;/P&gt;</description>
      <pubDate>Tue, 28 Feb 2023 06:49:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8633#M4218</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-02-28T06:49:51Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8634#M4219</link>
      <description>&lt;P&gt;@uzair mustafa​&amp;nbsp;&lt;/P&gt;&lt;P&gt;It's hard to answer without digging into the logs and code.&lt;/P&gt;</description>
      <pubDate>Wed, 01 Mar 2023 06:41:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8634#M4219</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2023-03-01T06:41:41Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8635#M4220</link>
      <description>&lt;P&gt;I have identified the issue. The driver memory is exhausting and the worker nodes are not coming into play...&lt;/P&gt;</description>
      <pubDate>Wed, 01 Mar 2023 07:12:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8635#M4220</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-03-01T07:12:00Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8636#M4221</link>
      <description>&lt;P&gt;@uzair mustafa​&amp;nbsp;&lt;/P&gt;&lt;P&gt;So basically, that's what I expected, OOM &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;&lt;P&gt;It's good that you were able to find an issue.&lt;/P&gt;</description>
      <pubDate>Wed, 01 Mar 2023 07:34:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8636#M4221</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2023-03-01T07:34:28Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8637#M4222</link>
      <description>&lt;P&gt;Hi @uzair mustafa​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope everything is going great.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Just wanted to check in if you were able to resolve your issue. If yes, would you be happy to mark an answer as best so that other members can find the solution more quickly? If not, please tell us so we can help you.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Cheers!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 13 Mar 2023 04:53:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8637#M4222</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-03-13T04:53:42Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8638#M4223</link>
      <description>Hi Vidula,&lt;BR /&gt;I still face the issue and it has not been resolved. It would be great if some one helps me.</description>
      <pubDate>Mon, 13 Mar 2023 05:09:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8638#M4223</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-03-13T05:09:20Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8639#M4224</link>
      <description>&lt;P&gt;Did you solve this issue ? I'm in similar situation. &lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 08:10:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8639#M4224</guid>
      <dc:creator>JKR</dc:creator>
      <dc:date>2023-04-26T08:10:58Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8640#M4225</link>
      <description>&lt;P&gt;@Jeetash Kumar​&amp;nbsp;I identified the issue, which was the driver memory was getting exhausted. I fine tuned my code so that lesser operations are done on the driver side and I reduced the concurrency of my tasks. This answer is based on my use case. &lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 10:24:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8640#M4225</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-04-26T10:24:19Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8641#M4226</link>
      <description>&lt;P&gt;You can take a look at your driver memory by looking at the Ganglia UI, monitor it as it your cluster runs..&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 10:25:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8641#M4226</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-04-26T10:25:02Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8642#M4227</link>
      <description>&lt;P&gt;Yes, I'm monitoring driver memory in Ganglia (Attaching SS of the driver node).&lt;/P&gt;&lt;P&gt;what might be the list of operations are done on the driver side which I need to avoid ?&lt;/P&gt;</description>
      <pubDate>Wed, 26 Apr 2023 11:09:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8642#M4227</guid>
      <dc:creator>JKR</dc:creator>
      <dc:date>2023-04-26T11:09:13Z</dc:date>
    </item>
    <item>
      <title>Re: Concurrent Jobs - The spark driver has stopped unexpectedly!</title>
      <link>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8643#M4228</link>
      <description>Operations like collect() select() are done on driver node. All aggregations. Avoid those.</description>
      <pubDate>Wed, 26 Apr 2023 11:11:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/concurrent-jobs-the-spark-driver-has-stopped-unexpectedly/m-p/8643#M4228</guid>
      <dc:creator>uzairm</dc:creator>
      <dc:date>2023-04-26T11:11:27Z</dc:date>
    </item>
  </channel>
</rss>

