<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Pyspark operations slowness in CLuster 14.3LTS as compared to 13.3 LTS in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/66629#M33174</link>
    <description>&lt;P&gt;check the physical query plan for both, DBR 14.3 and 13.3 to compare if these values are different. If they are, then check the Spark UI to identify where did it changed&lt;/P&gt;</description>
    <pubDate>Thu, 18 Apr 2024 21:48:42 GMT</pubDate>
    <dc:creator>jose_gonzalez</dc:creator>
    <dc:date>2024-04-18T21:48:42Z</dc:date>
    <item>
      <title>Pyspark operations slowness in CLuster 14.3LTS as compared to 13.3 LTS</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/66220#M33056</link>
      <description>&lt;P&gt;In my notebook, i am performing few join operations which are taking more than 30s in cluster 14.3 LTS where same operation is taking less than 4s in 13.3 LTS cluster. Can someone help me how can i optimize pyspark operations like joins and withColumn?&lt;/P&gt;</description>
      <pubDate>Sun, 14 Apr 2024 20:37:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/66220#M33056</guid>
      <dc:creator>anish2102</dc:creator>
      <dc:date>2024-04-14T20:37:29Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark operations slowness in CLuster 14.3LTS as compared to 13.3 LTS</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/66629#M33174</link>
      <description>&lt;P&gt;check the physical query plan for both, DBR 14.3 and 13.3 to compare if these values are different. If they are, then check the Spark UI to identify where did it changed&lt;/P&gt;</description>
      <pubDate>Thu, 18 Apr 2024 21:48:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/66629#M33174</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2024-04-18T21:48:42Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark operations slowness in CLuster 14.3LTS as compared to 13.3 LTS</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/67076#M33274</link>
      <description>&lt;P&gt;Are you comparing the performance against same dataset?&lt;/P&gt;</description>
      <pubDate>Tue, 23 Apr 2024 14:05:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/67076#M33274</guid>
      <dc:creator>Lakshay</dc:creator>
      <dc:date>2024-04-23T14:05:14Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark operations slowness in CLuster 14.3LTS as compared to 13.3 LTS</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/67089#M33277</link>
      <description>&lt;P&gt;I have found the issue. It was actually with code where&amp;nbsp; dataframe was being referred multiple times in withcolumn and join operations in form dataframe['col_name'] which is creating more than 20 spark jobs and hence causing degradation in performance of notebook. If i refer column using col() function in both join and withcolumn hen it is working fast compared to previous one. Also it is crating 1 or 2 spark job only.&lt;/P&gt;</description>
      <pubDate>Tue, 23 Apr 2024 15:10:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/67089#M33277</guid>
      <dc:creator>anish2102</dc:creator>
      <dc:date>2024-04-23T15:10:43Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark operations slowness in CLuster 14.3LTS as compared to 13.3 LTS</title>
      <link>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/67093#M33279</link>
      <description>&lt;P&gt;Thank you for sharing the analysis&lt;/P&gt;</description>
      <pubDate>Tue, 23 Apr 2024 15:42:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/pyspark-operations-slowness-in-cluster-14-3lts-as-compared-to-13/m-p/67093#M33279</guid>
      <dc:creator>Lakshay</dc:creator>
      <dc:date>2024-04-23T15:42:23Z</dc:date>
    </item>
  </channel>
</rss>

