<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Determining spill from system tables in Administration &amp; Architecture</title>
    <link>https://community.databricks.com/t5/administration-architecture/determining-spill-from-system-tables/m-p/104165#M2673</link>
    <description>&lt;P&gt;I'm trying to optimize machine selection (D, E, or L types on Azure) for job clusters and all-purpose compute and am struggling to identify where performance is sagging on account of disk spill.&amp;nbsp; Disk spill would suggest that more memory is needed.&amp;nbsp; I can get there from the Spark UI but am looking for historical diagnostics.&lt;/P&gt;&lt;P&gt;As of January 2025, system.compute.node_timeline is telling me useful things but not spill, explicitly.&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/en/admin/system-tables/compute.html#node-timeline-table-schema" target="_blank" rel="noopener"&gt;https://docs.databricks.com/en/admin/system-tables/compute.html#node-timeline-table-schema&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Help appreciated.&lt;/P&gt;</description>
    <pubDate>Fri, 03 Jan 2025 23:37:40 GMT</pubDate>
    <dc:creator>drumcircle</dc:creator>
    <dc:date>2025-01-03T23:37:40Z</dc:date>
    <item>
      <title>Determining spill from system tables</title>
      <link>https://community.databricks.com/t5/administration-architecture/determining-spill-from-system-tables/m-p/104165#M2673</link>
      <description>&lt;P&gt;I'm trying to optimize machine selection (D, E, or L types on Azure) for job clusters and all-purpose compute and am struggling to identify where performance is sagging on account of disk spill.&amp;nbsp; Disk spill would suggest that more memory is needed.&amp;nbsp; I can get there from the Spark UI but am looking for historical diagnostics.&lt;/P&gt;&lt;P&gt;As of January 2025, system.compute.node_timeline is telling me useful things but not spill, explicitly.&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/en/admin/system-tables/compute.html#node-timeline-table-schema" target="_blank" rel="noopener"&gt;https://docs.databricks.com/en/admin/system-tables/compute.html#node-timeline-table-schema&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Help appreciated.&lt;/P&gt;</description>
      <pubDate>Fri, 03 Jan 2025 23:37:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/administration-architecture/determining-spill-from-system-tables/m-p/104165#M2673</guid>
      <dc:creator>drumcircle</dc:creator>
      <dc:date>2025-01-03T23:37:40Z</dc:date>
    </item>
    <item>
      <title>Re: Determining spill from system tables</title>
      <link>https://community.databricks.com/t5/administration-architecture/determining-spill-from-system-tables/m-p/104167#M2674</link>
      <description>&lt;P&gt;For historical diagnostics, you might need to consider setting up a custom logging mechanism that captures these metrics over time and stores them in a persistent storage solution, such as a database or a logging service. This way, you can query and analyze historical performance data, including disk spill, at any point in the future.&lt;/P&gt;</description>
      <pubDate>Sat, 04 Jan 2025 00:42:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/administration-architecture/determining-spill-from-system-tables/m-p/104167#M2674</guid>
      <dc:creator>Walter_C</dc:creator>
      <dc:date>2025-01-04T00:42:54Z</dc:date>
    </item>
  </channel>
</rss>

