We have been using runtime 14.2, share mode for our computing cluster in Databrick for quite some time. We are now trying to upgrade to python 3.11 for some dependencies mangement, thereby requiring us to use runtime 15.1/15.2 as runtime 14.2 only support python 3.10.
We can use runtime 15.1 in single user mode without any problems, however on using shared mode, we cannot run our notebooks. Even a simple statement like print(), keeps on getting failed with error
"Failure starting repl. Try detaching and re-attaching the notebook."
Error does not go away on attaching and detaching the notebook.
Behavior is same on runtime 15.1 and 15.2 Beta. Issue is seen only with shared mode, and we must use shared mode for our use case. We even tried to use customized docker image to use python 3.11 on runtime 14.2, but this feature is again not supported in the shared mode.
Cluster driver logs show the following error
0.00s - Debugger warning: It seems that frozen modules are being used, which may
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Tue May 7 10:10:53 2024 Connection to spark from PID 2943
Tue May 7 10:10:53 2024 Initialized gateway on port 35681
Traceback (most recent call last):
File "/databricks/python_shell/scripts/db_ipykernel_launcher.py", line 151, in <module>
main()
File "/databricks/python_shell/scripts/db_ipykernel_launcher.py", line 67, in main
user_namespace_initializer = UserNamespaceInitializer.getOrCreate()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/python_shell/dbruntime/UserNamespaceInitializer.py", line 100, in getOrCreate
sparkHandles, spark_entry_point = initialize_spark_connection(is_pinn_mode_enabled())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/python_shell/dbruntime/spark_connection.py", line 204, in initialize_spark_connection
sparkSession = get_and_configure_uds_spark(conf) \
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/python_shell/dbruntime/spark_connection.py", line 236, in get_and_configure_uds_spark
spark = RemoteSparkSession.builder.channelBuilder(uds_channel_builder).getOrCreate()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/sql/connect/session.py", line 246, in getOrCreate
session = self.create()
^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/sql/connect/session.py", line 230, in create
session = SparkSession(connection=self._channel_builder)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/sql/connect/session.py", line 273, in __init__
self._init_client = SparkConnectClient(connection=connection, user_id=userId)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/sql/connect/client/core.py", line 703, in __init__
_safe_enable_large_protobufs_if_available()
File "/databricks/spark/python/pyspark/sql/connect/client/core.py", line 115, in _safe_enable_large_protobufs_if_available
api_implementation._c_module.SetAllowOversizeProtos(True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: module 'google.protobuf.internal.api_implementation' has no attribute '_c_module'
Notebook cell shows the following errors:
Failure starting repl. Try detaching and re-attaching the notebook.
at com.databricks.spark.chauffeur.ExecContextState.processInternalMessage(ExecContextState.scala:331)
at com.databricks.spark.chauffeur.ChauffeurState.processDriverResponse(ChauffeurState.scala:552)
at com.databricks.spark.chauffeur.Chauffeur$$anon$2$$anonfun$receive$2.handleDriverBackendResponse$1(Chauffeur.scala:1198)
at com.databricks.spark.chauffeur.Chauffeur$$anon$2$$anonfun$receive$2.$anonfun$applyOrElse$34(Chauffeur.scala:1228)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:629)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:647)
at com.databricks.logging.AttributionContextTracing.$anonfun$withAttributionContext$1(AttributionContextTracing.scala:48)
at com.databricks.logging.AttributionContext$.$anonfun$withValue$1(AttributionContext.scala:244)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:240)
at com.databricks.logging.AttributionContextTracing.withAttributionContext(AttributionContextTracing.scala:46)
at com.databricks.logging.AttributionContextTracing.withAttributionContext$(AttributionContextTracing.scala:43)
at com.databricks.rpc.ServerBackend.withAttributionContext(ServerBackend.scala:22)
at com.databricks.logging.AttributionContextTracing.withAttributionTags(AttributionContextTracing.scala:95)
at com.databricks.logging.AttributionContextTracing.withAttributionTags$(AttributionContextTracing.scala:76)
at com.databricks.rpc.ServerBackend.withAttributionTags(ServerBackend.scala:22)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:624)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:534)
at com.databricks.rpc.ServerBackend.recordOperationWithResultTags(ServerBackend.scala:22)
at com.databricks.spark.chauffeur.Chauffeur$$anon$2$$anonfun$receive$2.handleDriverBackendResponseWithUsageLogging$1(Chauffeur.scala:1227)
at com.databricks.spark.chauffeur.Chauffeur$$anon$2$$anonfun$receive$2.applyOrElse(Chauffeur.scala:1250)
at com.databricks.spark.chauffeur.Chauffeur$$anon$2$$anonfun$receive$2.applyOrElse(Chauffeur.scala:1161)
at com.databricks.rpc.ServerBackend.$anonfun$internalReceive0$2(ServerBackend.scala:174)
at com.databricks.rpc.ServerBackend$$anonfun$commonReceive$1.applyOrElse(ServerBackend.scala:200)
at com.databricks.rpc.ServerBackend$$anonfun$commonReceive$1.applyOrElse(ServerBackend.scala:200)
at com.databricks.rpc.ServerBackend.internalReceive0(ServerBackend.scala:171)
at com.databricks.rpc.ServerBackend.$anonfun$internalReceive$1(ServerBackend.scala:147)
at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:525)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:629)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:647)
at com.databricks.logging.AttributionContextTracing.$anonfun$withAttributionContext$1(AttributionContextTracing.scala:48)
at com.databricks.logging.AttributionContext$.$anonfun$withValue$1(AttributionContext.scala:244)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:240)
at com.databricks.logging.AttributionContextTracing.withAttributionContext(AttributionContextTracing.scala:46)
at com.databricks.logging.AttributionContextTracing.withAttributionContext$(AttributionContextTracing.scala:43)
at com.databricks.rpc.ServerBackend.withAttributionContext(ServerBackend.scala:22)
at com.databricks.logging.AttributionContextTracing.withAttributionTags(AttributionContextTracing.scala:95)
at com.databricks.logging.AttributionContextTracing.withAttributionTags$(AttributionContextTracing.scala:76)
at com.databricks.rpc.ServerBackend.withAttributionTags(ServerBackend.scala:22)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:624)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:534)
at com.databricks.rpc.ServerBackend.recordOperationWithResultTags(ServerBackend.scala:22)
at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:526)
at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:494)
at com.databricks.rpc.ServerBackend.recordOperation(ServerBackend.scala:22)
at com.databricks.rpc.ServerBackend.internalReceive(ServerBackend.scala:146)
at com.databricks.rpc.JettyServer$RequestManager.handleRPC(JettyServer.scala:1021)
at com.databricks.rpc.JettyServer$RequestManager.handleRequestAndRespond(JettyServer.scala:942)
at com.databricks.rpc.JettyServer$RequestManager.$anonfun$handleHttp$6(JettyServer.scala:546)
at com.databricks.rpc.JettyServer$RequestManager.$anonfun$handleHttp$6$adapted(JettyServer.scala:515)
at com.databricks.logging.activity.ActivityContextFactory$.$anonfun$withActivityInternal$6(ActivityContextFactory.scala:546)
at com.databricks.logging.AttributionContextTracing.$anonfun$withAttributionContext$1(AttributionContextTracing.scala:48)
at com.databricks.logging.AttributionContext$.$anonfun$withValue$1(AttributionContext.scala:244)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:240)
at com.databricks.logging.AttributionContextTracing.withAttributionContext(AttributionContextTracing.scala:46)
at com.databricks.logging.AttributionContextTracing.withAttributionContext$(AttributionContextTracing.scala:43)
at com.databricks.logging.activity.ActivityContextFactory$.withAttributionContext(ActivityContextFactory.scala:57)
at com.databricks.logging.activity.ActivityContextFactory$.$anonfun$withActivityInternal$3(ActivityContextFactory.scala:546)
at com.databricks.context.integrity.IntegrityCheckContext$ThreadLocalStorage$.withValue(IntegrityCheckContext.scala:72)
at com.databricks.logging.activity.ActivityContextFactory$.withActivityInternal(ActivityContextFactory.scala:524)
at com.databricks.logging.activity.ActivityContextFactory$.withServiceRequestActivity(ActivityContextFactory.scala:178)
at com.databricks.rpc.JettyServer$RequestManager.handleHttp(JettyServer.scala:515)
at com.databricks.rpc.JettyServer$RequestManager.doPost(JettyServer.scala:405)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:665)
at com.databricks.rpc.HttpServletWithPatch.service(HttpServletWithPatch.scala:33)
at javax.servlet.http.HttpServlet.service(HttpServlet.java:750)
at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:554)
at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:505)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
at org.eclipse.jetty.server.Server.handle(Server.java:516)
at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487)
at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479)
at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409)
at com.databricks.rpc.InstrumentedQueuedThreadPool$$anon$1.$anonfun$run$2(InstrumentedQueuedThreadPool.scala:106)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.logging.AttributionContextTracing.$anonfun$withAttributionContext$1(AttributionContextTracing.scala:48)
at com.databricks.logging.AttributionContext$.$anonfun$withValue$1(AttributionContext.scala:244)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:240)
at com.databricks.logging.AttributionContextTracing.withAttributionContext(AttributionContextTracing.scala:46)
at com.databricks.logging.AttributionContextTracing.withAttributionContext$(AttributionContextTracing.scala:43)
at com.databricks.rpc.InstrumentedQueuedThreadPool.withAttributionContext(InstrumentedQueuedThreadPool.scala:46)
at com.databricks.rpc.InstrumentedQueuedThreadPool$$anon$1.$anonfun$run$1(InstrumentedQueuedThreadPool.scala:106)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.instrumentation.QueuedThreadPoolInstrumenter.trackActiveThreads(QueuedThreadPoolInstrumenter.scala:150)
at com.databricks.instrumentation.QueuedThreadPoolInstrumenter.trackActiveThreads$(QueuedThreadPoolInstrumenter.scala:147)
at com.databricks.rpc.InstrumentedQueuedThreadPool.trackActiveThreads(InstrumentedQueuedThreadPool.scala:46)
at com.databricks.rpc.InstrumentedQueuedThreadPool$$anon$1.run(InstrumentedQueuedThreadPool.scala:88)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883)
at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034)