When trying to setup databricks-connect on WSL2 using 13.3 cluster, I receive the following error regarding OpenSSL CERTIFICATE_ERIFY_FAILED.
The authentication is done via SPARK_REMOTE env. variable.
E0415 11:24:26.646129568 142172 ssl_transport_security.cc:1519] Handshake failed with fatal error SSL_ERROR_SSL: error:1000007d:SSL routines:OPENSSL_internal:CERTIFICATE_VERIFY_FAILED.
Traceback (most recent call last):
File "/home/vpacik/Codes/spark-test.py", line 5, in <module>
spark.range(10).show()
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/dataframe.py", line 996, in show
print(self._show_string(n, truncate, vertical))
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/dataframe.py", line 753, in _show_string
).toPandas()
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/dataframe.py", line 1655, in toPandas
return self._session.client.to_pandas(query)
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/client/core.py", line 798, in to_pandas
table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(req)
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/client/core.py", line 1172, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/client/core.py", line 1153, in _execute_and_fetch_as_iterator
self._handle_error(error)
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/client/core.py", line 1308, in _handle_error
self._handle_rpc_error(error)
File "/home/vpacik/Codes/.venv/lib/python3.10/site-packages/pyspark/sql/connect/client/core.py", line 1348, in _handle_rpc_error
raise SparkConnectGrpcException(str(rpc_error)) from None
pyspark.errors.exceptions.connect.SparkConnectGrpcException: <_MultiThreadedRendezvous of RPC that terminated with:
status = StatusCode.UNAVAILABLE
details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:20.42.4.211:443: Ssl handshake failed: SSL_ERROR_SSL: error:1000007d:SSL routines:OPENSSL_internal:CERTIFICATE_VERIFY_FAILED"
debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-04-15T11:24:26.646729568+02:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:20.42.4.211:443: Ssl handshake failed: SSL_ERROR_SSL: error:1000007d:SSL routines:OPENSSL_internal:CERTIFICATE_VERIFY_FAILED"}"
The script used:
from databricks.connect import DatabricksSession
spark = DatabricksSession.builder.getOrCreate()
spark.range(10).show()