<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Troubleshooting Spill in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/44142#M27609</link>
    <description>&lt;P&gt;You can resolver the Spill to memory by increasing the shuffle partitions, but 16 GB of spill memory should not create a major impact of your job execution. Could you share more details on the actual source code that you are running?&lt;/P&gt;</description>
    <pubDate>Fri, 08 Sep 2023 22:46:17 GMT</pubDate>
    <dc:creator>jose_gonzalez</dc:creator>
    <dc:date>2023-09-08T22:46:17Z</dc:date>
    <item>
      <title>Troubleshooting Spill</title>
      <link>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/43909#M27580</link>
      <description>&lt;P&gt;I am trying to troubleshoot why spill occurred during DeltaOptimizeWrite. I am running a 64-core cluster with 256 GB RAM, which I expect to be handle this amount data (see attached DAG).&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-left" image-alt="IMG_1085.jpeg" style="width: 155px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/3536iAF77B5E21475C508/image-size/small/is-moderation-mode/true?v=v2&amp;amp;px=200" role="button" title="IMG_1085.jpeg" alt="IMG_1085.jpeg" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 07 Sep 2023 06:16:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/43909#M27580</guid>
      <dc:creator>lawrence009</dc:creator>
      <dc:date>2023-09-07T06:16:27Z</dc:date>
    </item>
    <item>
      <title>Re: Troubleshooting Spill</title>
      <link>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/43910#M27581</link>
      <description>&lt;P&gt;Data Skewness: Some tasks might be processing more data than others. Incorrect Resource Allocation: Ensure that Spark configurations (like spark.executor.memory, spark.core etc.) are set appropriately. Complex Computations: The operations in the DAG might be too complex, causing excessive memory usage.&lt;/P&gt;</description>
      <pubDate>Thu, 07 Sep 2023 06:17:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/43910#M27581</guid>
      <dc:creator>Finleycartwrigh</dc:creator>
      <dc:date>2023-09-07T06:17:40Z</dc:date>
    </item>
    <item>
      <title>Re: Troubleshooting Spill</title>
      <link>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/43939#M27585</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/7326"&gt;@lawrence009&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You can also take a look at the individual task level metrics. This should help in understanding whether there was skew involved during the processing. We can also get a better understanding of the spill by viewing the Task Level Summary. We record aggregated informations at min, 25th, 50th, 75th and max percentiles.&lt;/P&gt;</description>
      <pubDate>Thu, 07 Sep 2023 09:42:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/43939#M27585</guid>
      <dc:creator>Tharun-Kumar</dc:creator>
      <dc:date>2023-09-07T09:42:00Z</dc:date>
    </item>
    <item>
      <title>Re: Troubleshooting Spill</title>
      <link>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/44142#M27609</link>
      <description>&lt;P&gt;You can resolver the Spill to memory by increasing the shuffle partitions, but 16 GB of spill memory should not create a major impact of your job execution. Could you share more details on the actual source code that you are running?&lt;/P&gt;</description>
      <pubDate>Fri, 08 Sep 2023 22:46:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/troubleshooting-spill/m-p/44142#M27609</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2023-09-08T22:46:17Z</dc:date>
    </item>
  </channel>
</rss>

