Debugger freezes when calling spark.sql with dbx connect
Options
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
02-21-2025 02:55 AM
I have just created a simple bundle with databricks, and is using Databricks connect to debug locally. This is my script:
from pyspark.sql import SparkSession, DataFrame
def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")
# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
def get_spark() -> SparkSession:
try:
from databricks.connect import DatabricksSession
return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()
def test_connection():
try:
print("Attempting to create Spark session...")
spark = get_spark()
print("Successfully created Spark session")
# Test with a simple query first
print("Testing with a simple query...")
test_query = "SELECT 1 as test"
test_df = spark.sql(test_query)
print("Simple query successful")
# If simple query works, try listing tables
print("Attempting to list tables...")
spark.sql("SHOW DATABASES").show()
return spark
except Exception as e:
print(f"Error type: {type(e).__name__}")
print(f"Error message: {str(e)}")
print(f"Error location: {e.__traceback__.tb_frame.f_code.co_filename}:{e.__traceback__.tb_lineno}")
raise
def main():
try:
# First test the connection
spark = test_connection()
print("Connection test completed successfully")
# If connection works, proceed with the original code
print("Proceeding with main query...")
# Define your SQL query
sql_query = """
select * from supermarket_dev.streaming_bronze.source_setting where source_application = 'iban'
"""
print(f"Executing query: {sql_query}")
# Execute the SQL query and convert the results into a DataFrame
df = spark.sql(sql_query)
print("Query executed successfully")
print(f"DataFrame is empty: {df.isEmpty()}")
print(f"DataFrame schema: {df.schema}")
# Show the DataFrame contents
first = df.first()
print(f"First row: {first}")
except Exception as e:
print(f"Error type: {type(e).__name__}")
print(f"Error message: {str(e)}")
print(f"Error location: {e.__traceback__.tb_frame.f_code.co_filename}:{e.__traceback__.tb_lineno}")
raise
if __name__ == '__main__':
main()
Every time i call spark.sql then the debugger freezes following and VS code just stands like this:
If I deploy it then I can see it runs through successfully:
Any pointers what to do or what can cause this?
0 REPLIES 0

