Passing proxy configurations with databricks-sql-connector python?

venkad
Contributor

Hi,

I am trying to connect to databricks workspace which has IP Access restriction enabled using databricks-sql-connector. Only my Proxy server IPs are added in the allow list.

from databricks import sql
 
connection = sql.connect(
  server_hostname ='adb-random12094383.azuredatabricks.net', 
  http_path       ='/sql/1.0/endpoints/randomid', 
  access_token='<access token here>',
)
with connection.cursor() as cur:
  cur.execute('show schemas')
  print(cur.fetchall())

This fails with the below error

---------------------------------------------------------------------------
RequestError                              Traceback (most recent call last)
Input In [2], in <cell line: 1>()
----> 1 connection = sql.connect(
      2   server_hostname ='adb-2961643968016177.17.azuredatabricks.net', 
      3   http_path       ='/sql/1.0/endpoints/17a021e95a9e6f25', 
      4   access_token='dapi3d97e76ce3de34fb4ea060cf6b068e0a',
      5   http_headers= [("https_proxy", "http://proxy-dmz.intel.com:912"), ("http_proxy", "http://proxy-dmz.intel.com:912")]
      6   )
 
File ~/project/python/venv/lib/python3.8/site-packages/databricks/sql/__init__.py:50, in connect(server_hostname, http_path, access_token, **kwargs)
     47 def connect(server_hostname, http_path, access_token, **kwargs):
     48     from .client import Connection
---> 50     return Connection(server_hostname, http_path, access_token, **kwargs)
 
File ~/project/python/venv/lib/python3.8/site-packages/databricks/sql/client.py:129, in Connection.__init__(self, server_hostname, http_path, access_token, http_headers, session_configuration, catalog, schema, **kwargs)
    120 base_headers = [("User-Agent", useragent_header)] + authorization_header
    121 self.thrift_backend = ThriftBackend(
    122     self.host,
    123     self.port,
   (...)
    126     **kwargs
    127 )
--> 129 self._session_handle = self.thrift_backend.open_session(
    130     session_configuration, catalog, schema
    131 )
    132 self.open = True
    133 logger.info("Successfully opened session " + str(self.get_session_id()))
 
File ~/project/python/venv/lib/python3.8/site-packages/databricks/sql/thrift_backend.py:444, in ThriftBackend.open_session(self, session_configuration, catalog, schema)
    435     initial_namespace = None
    437 open_session_req = ttypes.TOpenSessionReq(
    438     client_protocol_i64=ttypes.TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V5,
    439     client_protocol=None,
   (...)
    442     configuration=session_configuration,
    443 )
--> 444 response = self.make_request(self._client.OpenSession, open_session_req)
    445 self._check_initial_namespace(catalog, schema, response)
    446 self._check_protocol_version(response)
 
File ~/project/python/venv/lib/python3.8/site-packages/databricks/sql/thrift_backend.py:373, in ThriftBackend.make_request(self, method, request)
    371 error_info = response_or_error_info
    372 # The error handler will either sleep or throw an exception
--> 373 self._handle_request_error(error_info, attempt, elapsed)
 
File ~/project/python/venv/lib/python3.8/site-packages/databricks/sql/thrift_backend.py:241, in ThriftBackend._handle_request_error(self, error_info, attempt, elapsed)
    236     network_request_error = RequestError(
    237         user_friendly_error_message, full_error_info_context, error_info.error
    238     )
    239     logger.info(network_request_error.message_with_context())
--> 241     raise network_request_error
    243 logger.info(
    244     "Retrying request after error in {} seconds: {}".format(
    245         error_info.retry_delay, full_error_info_context
    246     )
    247 )
    248 time.sleep(error_info.retry_delay)
 
RequestError: Error during request to server

I also tried with the proxy setting along with the http_headers

  connection = sql.connect(
  server_hostname ='adb-random12094383.azuredatabricks.net', 
  http_path       ='/sql/1.0/endpoints/randomid', 
  access_token='<access token here>',
  http_headers= [("https_proxy", "proxy.mydomain.com:912"), ("http_proxy", "proxy.mydomain.com:912")]
  )

Still it fails with the same error.

How to pass the proxy setting to the Databricks Python SQL connector?