<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: org.apache.spark.SparkException: Job aborted due to stage failure: Total size of serialized results of 69 tasks (4.0 GB) is bigger than spark.driver.maxResultSize (4.0 GB) in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16103#M10316</link>
    <description>&lt;P&gt;Hi @sachinmkp1@gmail.com​&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You need to add this Spark configuration at your cluster level, not at the notebook level. When you add it to the cluster level it will apply the settings properly. For more details on this issue, please check our knowledge base article &lt;A href="https://kb.databricks.com/jobs/job-fails-maxresultsize-exception.html" alt="https://kb.databricks.com/jobs/job-fails-maxresultsize-exception.html" target="_blank"&gt;https://kb.databricks.com/jobs/job-fails-maxresultsize-exception.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;</description>
    <pubDate>Thu, 16 Sep 2021 17:24:14 GMT</pubDate>
    <dc:creator>jose_gonzalez</dc:creator>
    <dc:date>2021-09-16T17:24:14Z</dc:date>
    <item>
      <title>org.apache.spark.SparkException: Job aborted due to stage failure: Total size of serialized results of 69 tasks (4.0 GB) is bigger than spark.driver.maxResultSize (4.0 GB)</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16101#M10314</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;set spark.conf.set("spark.driver.maxResultSize", "20g")&lt;/P&gt;
&lt;P&gt;get spark.conf.get("spark.driver.maxResultSize") // 20g which is expected in notebook , I did not do in cluster level setting&lt;/P&gt;
&lt;P&gt;still getting 4g while executing the spark job , why?&lt;/P&gt;
&lt;P&gt;because of this job is getting failed.&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Aug 2021 14:48:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16101#M10314</guid>
      <dc:creator>sachinmkp1</dc:creator>
      <dc:date>2021-08-23T14:48:52Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Total size of serialized results of 69 tasks (4.0 GB) is bigger than spark.driver.maxResultSize (4.0 GB)</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16102#M10315</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;question is- when I go to set spark.driver.maxResultSize = 20g in notebook only , it is not taking while executing the job even when I try to get the spark.driver.maxResultSize value in notebook I am getting 20g.&lt;/P&gt;
&lt;P&gt;Still need clarification why does it behave like this?&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Aug 2021 14:51:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16102#M10315</guid>
      <dc:creator>sachinmkp1</dc:creator>
      <dc:date>2021-08-23T14:51:54Z</dc:date>
    </item>
    <item>
      <title>Re: org.apache.spark.SparkException: Job aborted due to stage failure: Total size of serialized results of 69 tasks (4.0 GB) is bigger than spark.driver.maxResultSize (4.0 GB)</title>
      <link>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16103#M10316</link>
      <description>&lt;P&gt;Hi @sachinmkp1@gmail.com​&amp;nbsp;,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;You need to add this Spark configuration at your cluster level, not at the notebook level. When you add it to the cluster level it will apply the settings properly. For more details on this issue, please check our knowledge base article &lt;A href="https://kb.databricks.com/jobs/job-fails-maxresultsize-exception.html" alt="https://kb.databricks.com/jobs/job-fails-maxresultsize-exception.html" target="_blank"&gt;https://kb.databricks.com/jobs/job-fails-maxresultsize-exception.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Sep 2021 17:24:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/org-apache-spark-sparkexception-job-aborted-due-to-stage-failure/m-p/16103#M10316</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2021-09-16T17:24:14Z</dc:date>
    </item>
  </channel>
</rss>

