Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.runJob.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1.0 (TID 7) (10.141.5.10 executor 0): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/databricks/spark/python/pyspark/serializers.py", line 192, in _read_with_length
return self.loads(obj)
^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 572, in loads
return cloudpickle.loads(obj, encoding=encoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'dbruntime'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/databricks/spark/python/pyspark/worker.py", line 1980, in main
process()
File "/databricks/spark/python/pyspark/worker.py", line 1972, in process
serializer.dump_stream(out_iter, outfile)
File "/databricks/spark/python/pyspark/serializers.py", line 356, in dump_stream
vs = list(itertools.islice(iterator, batch))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/core/rdd.py", line 2757, in takeUpToNumLeft
yield next(iterator)
^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 162, in load_stream
yield self._read_with_length(stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 196, in _read_with_length
raise SerializationError("Caused by " + traceback.format_exc())
pyspark.serializers.SerializationError: Caused by Traceback (most recent call last):
File "/databricks/spark/python/pyspark/serializers.py", line 192, in _read_with_length
return self.loads(obj)
^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 572, in loads
return cloudpickle.loads(obj, encoding=encoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'dbruntime'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:560)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:968)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:953)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:516)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:366)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364)
at org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358)
at org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339)
at org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)
at org.apache.spark.api.python.PythonRDD$.$anonfun$collectPartitions$1(PythonRDD.scala:234)
at org.apache.spark.SparkContext.$anonfun$runJob$2(SparkContext.scala:3208)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:82)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:82)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:225)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)
at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)
at com.databricks.unity.EmptyHandle$.runWithAndClose(UCSHandle.scala:134)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:102)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1043)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:111)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1046)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:933)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Driver stacktrace:
at org.apache.spark.scheduler.DAGScheduler.$anonfun$failJobAndIndependentStages$1(DAGScheduler.scala:4043)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:4041)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:3954)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:3941)
at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:3941)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1767)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1750)
at scala.Option.foreach(Option.scala:407)
at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1750)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:4302)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4204)
at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:4190)
at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:55)
at org.apache.spark.scheduler.DAGScheduler.$anonfun$runJob$1(DAGScheduler.scala:1413)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1401)
at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:3168)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:3149)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:3189)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:3208)
at org.apache.spark.api.python.PythonRDD$.collectPartitions(PythonRDD.scala:234)
at org.apache.spark.api.python.PythonRDD$.runJob(PythonRDD.scala:254)
at org.apache.spark.api.python.PythonRDD.runJob(PythonRDD.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:397)
at py4j.Gateway.invoke(Gateway.java:306)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:199)
at py4j.ClientServerConnection.run(ClientServerConnection.java:119)
at java.lang.Thread.run(Thread.java:750)
Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/databricks/spark/python/pyspark/serializers.py", line 192, in _read_with_length
return self.loads(obj)
^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 572, in loads
return cloudpickle.loads(obj, encoding=encoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'dbruntime'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/databricks/spark/python/pyspark/worker.py", line 1980, in main
process()
File "/databricks/spark/python/pyspark/worker.py", line 1972, in process
serializer.dump_stream(out_iter, outfile)
File "/databricks/spark/python/pyspark/serializers.py", line 356, in dump_stream
vs = list(itertools.islice(iterator, batch))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/core/rdd.py", line 2757, in takeUpToNumLeft
yield next(iterator)
^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 162, in load_stream
yield self._read_with_length(stream)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 196, in _read_with_length
raise SerializationError("Caused by " + traceback.format_exc())
pyspark.serializers.SerializationError: Caused by Traceback (most recent call last):
File "/databricks/spark/python/pyspark/serializers.py", line 192, in _read_with_length
return self.loads(obj)
^^^^^^^^^^^^^^^
File "/databricks/spark/python/pyspark/serializers.py", line 572, in loads
return cloudpickle.loads(obj, encoding=encoding)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'dbruntime'
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:560)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:968)
at org.apache.spark.api.python.PythonRunner$$anon$3.read(PythonRunner.scala:953)
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:516)
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at org.apache.spark.InterruptibleIterator.foreach(InterruptibleIterator.scala:28)
at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:105)
at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:49)
at scala.collection.TraversableOnce.to(TraversableOnce.scala:366)
at scala.collection.TraversableOnce.to$(TraversableOnce.scala:364)
at org.apache.spark.InterruptibleIterator.to(InterruptibleIterator.scala:28)
at scala.collection.TraversableOnce.toBuffer(TraversableOnce.scala:358)
at scala.collection.TraversableOnce.toBuffer$(TraversableOnce.scala:358)
at org.apache.spark.InterruptibleIterator.toBuffer(InterruptibleIterator.scala:28)
at scala.collection.TraversableOnce.toArray(TraversableOnce.scala:345)
at scala.collection.TraversableOnce.toArray$(TraversableOnce.scala:339)
at org.apache.spark.InterruptibleIterator.toArray(InterruptibleIterator.scala:28)
at org.apache.spark.api.python.PythonRDD$.$anonfun$collectPartitions$1(PythonRDD.scala:234)
at org.apache.spark.SparkContext.$anonfun$runJob$2(SparkContext.scala:3208)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:82)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:82)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:225)
at org.apache.spark.scheduler.Task.doRunTask(Task.scala:199)
at org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:161)
at com.databricks.unity.EmptyHandle$.runWithAndClose(UCSHandle.scala:134)
at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:155)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.scheduler.Task.run(Task.scala:102)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$10(Executor.scala:1043)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:111)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:1046)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:933)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
... 1 more
File <command-3609477148953765>, line 195
193 start = time.time()
194 start11 = time.time()
--> 195 print(common.readZipFromStorage(sc, dbutils, "2025", "04", "07", True).mapPartitions(processdata).count())
196 print(common.readZipFromStorage(sc, dbutils, "2025", "04", "08", False).mapPartitions(processdata).count())
197 print("End of main...")
File /local_disk0/spark-8459fa57-12da-4b04-9ebe-30bf615a5380/userFiles-d0b4be2b-cfc5-4eb0-9626-f647444f128c/Lib.zip/Lib/common/__init__.py:132, in readZipFromStorage(sc, dbutils, year, month, date, chk)
129 rdd = sc.parallelize(final_list_of_files)
131 # Create data frame
--> 132 df = rdd.toDF()
133 df = df.filter(df.name.endswith(".zip"))
135 # Extract only name column from df
File /databricks/spark/python/pyspark/sql/session.py:133, in _monkey_patch_RDD.<locals>.toDF(self, schema, sampleRatio)
99 def toDF(self, schema=None, sampleRatio=None):
100 """
101 Converts current :class:`RDD` into a :class:`DataFrame`
102
(...)
131 +---+
132 """
--> 133 return sparkSession.createDataFrame(self, schema, sampleRatio)
File /databricks/spark/python/pyspark/instrumentation_utils.py:47, in _wrap_function.<locals>.wrapper(*args, **kwargs)
45 start = time.perf_counter()
46 try:
---> 47 res = func(*args, **kwargs)
48 logger.log_success(
49 module_name, class_name, function_name, time.perf_counter() - start, signature
50 )
51 return res
File /databricks/spark/python/pyspark/sql/session.py:1610, in SparkSession.createDataFrame(self, data, schema, samplingRatio, verifySchema)
1605 if has_pandas and isinstance(data, pd.DataFrame):
1606 # Create a DataFrame from pandas DataFrame.
1607 return super(SparkSession, self).createDataFrame( # type: ignore[call-overload]
1608 data, schema, samplingRatio, verifySchema
1609 )
-> 1610 return self._create_dataframe(
1611 data, schema, samplingRatio, verifySchema # type: ignore[arg-type]
1612 )
File /databricks/spark/python/pyspark/sql/session.py:1665, in SparkSession._create_dataframe(self, data, schema, samplingRatio, verifySchema)
1663 else:
1664 if not is_remote_only() and isinstance(data, RDD):
-> 1665 rdd, struct = self._createFromRDD(data.map(prepare), schema, samplingRatio)
1666 else:
1667 rdd, struct = self._createFromLocal(map(prepare, data), schema)
File /databricks/spark/python/pyspark/sql/session.py:1168, in SparkSession._createFromRDD(self, rdd, schema, samplingRatio)
1164 """
1165 Create an RDD for DataFrame from an existing RDD, returns the RDD and schema.
1166 """
1167 if schema is None or isinstance(schema, (list, tuple)):
-> 1168 struct = self._inferSchema(rdd, samplingRatio, names=schema)
1169 converter = _create_converter(struct)
1170 tupled_rdd = rdd.map(converter)
File /databricks/spark/python/pyspark/sql/session.py:1108, in SparkSession._inferSchema(self, rdd, samplingRatio, names)
1087 def _inferSchema(
1088 self,
1089 rdd: "RDD[Any]",
1090 samplingRatio: Optional[float] = None,
1091 names: Optional[List[str]] = None,
1092 ) -> StructType:
1093 """
1094 Infer schema from an RDD of Row, dict, or tuple.
1095
(...)
1106 :class:`pyspark.sql.types.StructType`
1107 """
-> 1108 first = rdd.first()
1109 if isinstance(first, Sized) and len(first) == 0:
1110 raise ValueError("The first row in RDD is empty, can not infer schema")
File /databricks/spark/python/pyspark/instrumentation_utils.py:42, in _wrap_function.<locals>.wrapper(*args, **kwargs)
38 @functools.wraps(func)
39 def wrapper(*args: Any, **kwargs: Any) -> Any:
40 if hasattr(_local, "logging") and _local.logging:
41 # no need to log since this should be internal call.
---> 42 return func(*args, **kwargs)
43 _local.logging = True
44 try:
File /databricks/spark/python/pyspark/core/rdd.py:2796, in RDD.first(self)
2770 def first(self: "RDD[T]") -> T:
2771 """
2772 Return the first element in this RDD.
2773
(...)
2794 ValueError: RDD is empty
2795 """
-> 2796 rs = self.take(1)
2797 if rs:
2798 return rs[0]
File /databricks/spark/python/pyspark/instrumentation_utils.py:42, in _wrap_function.<locals>.wrapper(*args, **kwargs)
38 @functools.wraps(func)
39 def wrapper(*args: Any, **kwargs: Any) -> Any:
40 if hasattr(_local, "logging") and _local.logging:
41 # no need to log since this should be internal call.
---> 42 return func(*args, **kwargs)
43 _local.logging = True
44 try:
File /databricks/spark/python/pyspark/core/rdd.py:2763, in RDD.take(self, num)
2760 taken += 1
2762 p = range(partsScanned, min(partsScanned + numPartsToTry, totalParts))
-> 2763 res = self.context.runJob(self, takeUpToNumLeft, p)
2765 items += res
2766 partsScanned += numPartsToTry
File /databricks/spark/python/pyspark/core/context.py:2702, in SparkContext.runJob(self, rdd, partitionFunc, partitions, allowLocal)
2700 finally:
2701 os.remove(filename)
-> 2702 sock_info = self._jvm.PythonRDD.runJob(self._jsc.sc(), mappedRDD._jrdd, partitions)
2703 return list(_load_from_socket(sock_info, mappedRDD._jrdd_deserializer))
File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1355, in JavaMember.__call__(self, *args)
1349 command = proto.CALL_COMMAND_NAME +\
1350 self.command_header +\
1351 args_command +\
1352 proto.END_COMMAND_PART
1354 answer = self.gateway_client.send_command(command)
-> 1355 return_value = get_return_value(
1356 answer, self.gateway_client, self.target_id, self.name)
1358 for temp_arg in temp_args:
1359 if hasattr(temp_arg, "_detach"):
File /databricks/spark/python/pyspark/errors/exceptions/captured.py:255, in capture_sql_exception.<locals>.deco(*a, **kw)
252 from py4j.protocol import Py4JJavaError
254 try:
--> 255 return f(*a, **kw)
256 except Py4JJavaError as e:
257 converted = convert_exception(e.java_exception)
File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Here is my Cluster Configuration with DBR version 15.4 LTS:
{
"cluster_id": "0321-120122-h2mf81qg",
"creator_user_name": "rahul.790578@gmail.com",
"driver": {
"private_ip": "10.141.5.11",
"public_dns": "172.184.184.61",
"node_id": "9818cc182cfd4b40b2210d69cd531011",
"instance_id": "0c4d9ca030fd44a9b347acb8cef8d8f9",
"start_timestamp": 1744177619114,
"node_attributes": {
"is_spot": false
},
"host_private_ip": "10.141.4.10"
},
"executors": [
{
"private_ip": "10.141.5.12",
"public_dns": "13.93.231.102",
"node_id": "7f4fcc2b934e44199daa7f1d58c94e88",
"instance_id": "5e0a86855feb48528ef18e9c4e2cad83",
"start_timestamp": 1744177619187,
"node_attributes": {
"is_spot": true
},
"host_private_ip": "10.141.4.11"
},
{
"private_ip": "10.141.5.10",
"public_dns": "20.253.192.255",
"node_id": "b0f85ccd11b24d23919578216453a496",
"instance_id": "9b7b71b46ca946b8a87895e9cf7602b2",
"start_timestamp": 1744177619142,
"node_attributes": {
"is_spot": true
},
"host_private_ip": "10.141.4.12"
}
],
"spark_context_id": 791941095622325100,
"driver_healthy": true,
"jdbc_port": 10000,
"cluster_name": "Rahul Jaiswa's Cluster",
"spark_version": "15.4.x-scala2.12",
"spark_conf": {
"spark.sql.adaptive.coalescePartitions.initialPartitionNum": "8",
"spark.sql.files.maxPartitionBytes": "2469606",
"spark.sql.files.minPartitionNum": "8",
"spark.sql.adaptive.enabled": "true",
"spark.driver.maxResultSize": "5g",
"spark.databricks.delta.preview.enabled": "true",
"spark.sql.adaptive.advisoryPartitionSizeInBytes": "2469606",
"spark.driver.memory": "5g",
"spark.default.parallelism": "8",
"spark.sql.adaptive.coalescePartitions.enabled": "true",
"spark.driver.cores": "8",
"spark.executor.memory": "4g",
"spark.sql.files.openCostInBytes": "134217",
"spark.sql.adaptive.coalescePartitions.minPartitionNum": "8",
"spark.sql.shuffle.partitions": "8"
},
"azure_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK_AZURE",
"spot_bid_max_price": -1
},
"node_type_id": "Standard_E8s_v3",
"driver_node_type_id": "Standard_E8s_v3",
"cluster_log_conf": {
"dbfs": {
"destination": "dbfs:/FileStore/manual-logs"
}
},
"spark_env_vars": {
"PYSPARK_PYTHON": "/databricks/python3/bin/python3"
},
"autotermination_minutes": 60,
"enable_elastic_disk": true,
"disk_spec": {},
"cluster_source": "UI",
"init_scripts": [
{
"workspace": {
"destination": "/startup1/pyodbc-install.sh"
}
}
],
"single_user_name": "rahul.790578@gmail.com",
"enable_local_disk_encryption": false,
"instance_source": {
"node_type_id": "Standard_E8s_v3"
},
"driver_instance_source": {
"node_type_id": "Standard_E8s_v3"
},
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
"runtime_engine": "PHOTON",
"effective_spark_version": "15.4.x-photon-scala2.12",
"assigned_principal": "user:rahul.790578@gmail.com",
"release_version": "15.4.12",
"state": "RUNNING",
"state_message": "",
"start_time": 1742558482247,
"last_state_loss_time": 1744177737748,
"last_activity_time": 1744177679642,
"last_restarted_time": 1744177737824,
"autoscale": {
"min_workers": 2,
"max_workers": 3,
"target_workers": 2
},
"cluster_memory_mb": 196608,
"cluster_cores": 24,
"default_tags": {
"Vendor": "Databricks",
"Creator": "rahul.790578@gmail.com",
"ClusterName": "Rahul Jaiswa's Cluster",
"ClusterId": "0321-120122-h2mf81qg",
"Environmet": "dvt"
},
"cluster_log_status": {
"last_attempted": 1744178955397
},
"init_scripts_safe_mode": false,
"spec": {
"cluster_name": "Rahul Jaiswa's Cluster",
"spark_version": "15.4.x-scala2.12",
"spark_conf": {
"spark.databricks.delta.preview.enabled": "true",
"spark.sql.files.minPartitionNum": "8",
"spark.sql.files.openCostInBytes": "134217",
"spark.sql.adaptive.coalescePartitions.initialPartitionNum": "8",
"spark.sql.adaptive.coalescePartitions.enabled": "true",
"spark.executor.memory": "4g",
"spark.driver.maxResultSize": "5g",
"spark.sql.shuffle.partitions": "8",
"spark.driver.memory": "5g",
"spark.sql.adaptive.coalescePartitions.minPartitionNum": "8",
"spark.sql.adaptive.enabled": "true",
"spark.sql.adaptive.advisoryPartitionSizeInBytes": "2469606",
"spark.sql.files.maxPartitionBytes": "2469606",
"spark.default.parallelism": "8",
"spark.driver.cores": "8"
},
"azure_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK_AZURE",
"spot_bid_max_price": -1
},
"node_type_id": "Standard_E8s_v3",
"driver_node_type_id": "Standard_E8s_v3",
"cluster_log_conf": {
"dbfs": {
"destination": "dbfs:/FileStore/manual-logs"
}
},
"spark_env_vars": {
"PYSPARK_PYTHON": "/databricks/python3/bin/python3"
},
"autotermination_minutes": 60,
"enable_elastic_disk": true,
"init_scripts": [
{
"workspace": {
"destination": "/startup1/pyodbc-install.sh"
}
}
],
"single_user_name": "rahul.790578@gmail.com",
"enable_local_disk_encryption": false,
"data_security_mode": "LEGACY_SINGLE_USER_STANDARD",
"runtime_engine": "PHOTON",
"assigned_principal": "user:rrahul.790578@gmail.com",
"autoscale": {
"min_workers": 2,
"max_workers": 3
},
"apply_policy_default_values": false
}
}