<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: spark.sql makes debugger freeze in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/spark-sql-makes-debugger-freeze/m-p/117293#M45469</link>
    <description>&lt;P&gt;Ensure that your Databricks Connect is properly set up and is using the correct version compatible with your cluster’s runtime. For VS Code, any mismatches between the installed &lt;CODE&gt;databricks-connect&lt;/CODE&gt; Python package version and the cluster runtime could lead to freezes or errors.&lt;/P&gt;
&lt;P&gt;Also, Add detailed logging in your code to help identify where the freeze might be happening.&amp;nbsp;You can add logs around the &lt;CODE&gt;spark.sql&lt;/CODE&gt; operations to monitor the query execution phases and catch errors, if any.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 01 May 2025 05:15:39 GMT</pubDate>
    <dc:creator>NandiniN</dc:creator>
    <dc:date>2025-05-01T05:15:39Z</dc:date>
    <item>
      <title>spark.sql makes debugger freeze</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-sql-makes-debugger-freeze/m-p/110179#M43512</link>
      <description>&lt;P&gt;&lt;SPAN&gt;I have just created a simple bundle with databricks, and is using Databricks connect to debug locally. This is my script:&lt;/SPAN&gt;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;from&lt;/SPAN&gt; pyspark.sql &lt;SPAN class=""&gt;import&lt;/SPAN&gt; SparkSession, DataFrame

&lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;get_taxis&lt;/SPAN&gt;(&lt;SPAN class=""&gt;spark: SparkSession&lt;/SPAN&gt;) -&amp;gt; DataFrame:
  &lt;SPAN class=""&gt;return&lt;/SPAN&gt; spark.read.table(&lt;SPAN class=""&gt;"samples.nyctaxi.trips"&lt;/SPAN&gt;)


&lt;SPAN class=""&gt;# Create a new Databricks Connect session. If this fails,&lt;/SPAN&gt;
&lt;SPAN class=""&gt;# check that you have configured Databricks Connect correctly.&lt;/SPAN&gt;
&lt;SPAN class=""&gt;# See https://docs.databricks.com/dev-tools/databricks-connect.html.&lt;/SPAN&gt;
&lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;get_spark&lt;/SPAN&gt;() -&amp;gt; SparkSession:
  &lt;SPAN class=""&gt;try&lt;/SPAN&gt;:
    &lt;SPAN class=""&gt;from&lt;/SPAN&gt; databricks.connect &lt;SPAN class=""&gt;import&lt;/SPAN&gt; DatabricksSession
    &lt;SPAN class=""&gt;return&lt;/SPAN&gt; DatabricksSession.builder.getOrCreate()
  &lt;SPAN class=""&gt;except&lt;/SPAN&gt; ImportError:
    &lt;SPAN class=""&gt;return&lt;/SPAN&gt; SparkSession.builder.getOrCreate()

&lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;test_connection&lt;/SPAN&gt;():
    &lt;SPAN class=""&gt;try&lt;/SPAN&gt;:
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Attempting to create Spark session..."&lt;/SPAN&gt;)
        spark = get_spark()
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Successfully created Spark session"&lt;/SPAN&gt;)
        
        &lt;SPAN class=""&gt;# Test with a simple query first&lt;/SPAN&gt;
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Testing with a simple query..."&lt;/SPAN&gt;)
        test_query = &lt;SPAN class=""&gt;"SELECT 1 as test"&lt;/SPAN&gt;
        test_df = spark.sql(test_query)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Simple query successful"&lt;/SPAN&gt;)
        
        &lt;SPAN class=""&gt;# If simple query works, try listing tables&lt;/SPAN&gt;
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Attempting to list tables..."&lt;/SPAN&gt;)
        spark.sql(&lt;SPAN class=""&gt;"SHOW DATABASES"&lt;/SPAN&gt;).show()
        
        &lt;SPAN class=""&gt;return&lt;/SPAN&gt; spark
        
    &lt;SPAN class=""&gt;except&lt;/SPAN&gt; Exception &lt;SPAN class=""&gt;as&lt;/SPAN&gt; e:
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Error type: &lt;SPAN class=""&gt;{&lt;SPAN class=""&gt;type&lt;/SPAN&gt;(e).__name__}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Error message: &lt;SPAN class=""&gt;{&lt;SPAN class=""&gt;str&lt;/SPAN&gt;(e)}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Error location: &lt;SPAN class=""&gt;{e.__traceback__.tb_frame.f_code.co_filename}&lt;/SPAN&gt;:&lt;SPAN class=""&gt;{e.__traceback__.tb_lineno}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;raise&lt;/SPAN&gt;

&lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;main&lt;/SPAN&gt;():
    &lt;SPAN class=""&gt;try&lt;/SPAN&gt;:
        &lt;SPAN class=""&gt;# First test the connection&lt;/SPAN&gt;
        spark = test_connection()
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Connection test completed successfully"&lt;/SPAN&gt;)
        
        &lt;SPAN class=""&gt;# If connection works, proceed with the original code&lt;/SPAN&gt;
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Proceeding with main query..."&lt;/SPAN&gt;)
        
        &lt;SPAN class=""&gt;# Define your SQL query&lt;/SPAN&gt;
        sql_query = &lt;SPAN class=""&gt;"""
        select * from supermarket_dev.streaming_bronze.source_setting where source_application = 'iban'
        """&lt;/SPAN&gt;
        
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Executing query: &lt;SPAN class=""&gt;{sql_query}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;# Execute the SQL query and convert the results into a DataFrame&lt;/SPAN&gt;
        df = spark.sql(sql_query)
        
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;"Query executed successfully"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"DataFrame is empty: &lt;SPAN class=""&gt;{df.isEmpty()}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"DataFrame schema: &lt;SPAN class=""&gt;{df.schema}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        
        &lt;SPAN class=""&gt;# Show the DataFrame contents&lt;/SPAN&gt;
        first = df.first()
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"First row: &lt;SPAN class=""&gt;{first}&lt;/SPAN&gt;"&lt;/SPAN&gt;)

    &lt;SPAN class=""&gt;except&lt;/SPAN&gt; Exception &lt;SPAN class=""&gt;as&lt;/SPAN&gt; e:
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Error type: &lt;SPAN class=""&gt;{&lt;SPAN class=""&gt;type&lt;/SPAN&gt;(e).__name__}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Error message: &lt;SPAN class=""&gt;{&lt;SPAN class=""&gt;str&lt;/SPAN&gt;(e)}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;print&lt;/SPAN&gt;(&lt;SPAN class=""&gt;f"Error location: &lt;SPAN class=""&gt;{e.__traceback__.tb_frame.f_code.co_filename}&lt;/SPAN&gt;:&lt;SPAN class=""&gt;{e.__traceback__.tb_lineno}&lt;/SPAN&gt;"&lt;/SPAN&gt;)
        &lt;SPAN class=""&gt;raise&lt;/SPAN&gt;

&lt;SPAN class=""&gt;if&lt;/SPAN&gt; __name__ == &lt;SPAN class=""&gt;'__main__'&lt;/SPAN&gt;:
  main()&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;Every time i call spark.sql then the debugger freezes following and VS code just stands like this:&lt;/SPAN&gt;&lt;/PRE&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sega2_0-1739520074229.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/14874i417AF0CF65FF5BDE/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Sega2_0-1739520074229.png" alt="Sega2_0-1739520074229.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;If I deploy it then I can see it runs through successfully:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Sega2_1-1739520103137.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/14875i700BBD959B8139CD/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Sega2_1-1739520103137.png" alt="Sega2_1-1739520103137.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Any pointers what to do or what can cause this?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 14 Feb 2025 08:05:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-sql-makes-debugger-freeze/m-p/110179#M43512</guid>
      <dc:creator>Sega2</dc:creator>
      <dc:date>2025-02-14T08:05:23Z</dc:date>
    </item>
    <item>
      <title>Re: spark.sql makes debugger freeze</title>
      <link>https://community.databricks.com/t5/data-engineering/spark-sql-makes-debugger-freeze/m-p/117293#M45469</link>
      <description>&lt;P&gt;Ensure that your Databricks Connect is properly set up and is using the correct version compatible with your cluster’s runtime. For VS Code, any mismatches between the installed &lt;CODE&gt;databricks-connect&lt;/CODE&gt; Python package version and the cluster runtime could lead to freezes or errors.&lt;/P&gt;
&lt;P&gt;Also, Add detailed logging in your code to help identify where the freeze might be happening.&amp;nbsp;You can add logs around the &lt;CODE&gt;spark.sql&lt;/CODE&gt; operations to monitor the query execution phases and catch errors, if any.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 01 May 2025 05:15:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/spark-sql-makes-debugger-freeze/m-p/117293#M45469</guid>
      <dc:creator>NandiniN</dc:creator>
      <dc:date>2025-05-01T05:15:39Z</dc:date>
    </item>
  </channel>
</rss>

