<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic StreamCorruptedException, databricks-connect 9.1 in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7050#M3033</link>
    <description>&lt;P&gt;Last week, around the 21st of march, we started having issues with databricks-connect (DBR 9.1 LTS). "&lt;I&gt;databricks-connect test"&lt;/I&gt; works, but the following code snippet:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql import SparkSession
&amp;nbsp;
&amp;nbsp;
spark = SparkSession.builder.getOrCreate()
sparkcontext = spark.sparkContext
&amp;nbsp;
rdd1 = sparkcontext.parallelize(range(10), 10)
rdd2 = rdd1.map(lambda x: x + 1)
sum2 = rdd2.sum()
&amp;nbsp;
assert sum2 == 55&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;fails with error:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;log4j:WARN No appenders could be found for logger (org.apache.spark.util.Utils).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See &lt;A href="http://logging.apache.org/log4j/1.2/faq.html#noconfig" target="test_blank"&gt;http://logging.apache.org/log4j/1.2/faq.html#noconfig&lt;/A&gt; for more info.
View job details at https://[redacted].azuredatabricks.net/?o=[redacted]#/setting/clusters/[redacted]/sparkUi
Traceback (most recent call last):
  File "main.py", line 9, in &amp;lt;module&amp;gt;
    sum2 = rdd2.sum()
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/rdd.py", line 1259, in sum
    return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/rdd.py", line 1113, in fold
    vals = self.mapPartitions(func).collect()
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/rdd.py", line 967, in collect
    sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/py4j/java_gateway.py", line 1304, in __call__
    return_value = get_return_value(
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/sql/utils.py", line 117, in deco
    return f(*a, **kw)
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/py4j/protocol.py", line 326, in get_return_value
    raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: java.io.StreamCorruptedException: invalid type code: 01
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1700)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
        at java.io.ObjectInputStream.readArray(ObjectInputStream.java:2119)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1657)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
        at org.apache.spark.sql.util.ProtoSerializer.$anonfun$deserializeObject$1(ProtoSerializer.scala:6631)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at org.apache.spark.sql.util.ProtoSerializer.deserializeObject(ProtoSerializer.scala:6616)
        at com.databricks.service.SparkServiceRPCHandler.execute0(SparkServiceRPCHandler.scala:728)
        at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC0$1(SparkServiceRPCHandler.scala:477)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at com.databricks.service.SparkServiceRPCHandler.executeRPC0(SparkServiceRPCHandler.scala:372)
        at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:323)
        at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:309)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC$1(SparkServiceRPCHandler.scala:359)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at com.databricks.service.SparkServiceRPCHandler.executeRPC(SparkServiceRPCHandler.scala:336)
        at com.databricks.service.SparkServiceRPCServlet.doPost(SparkServiceRPCServer.scala:167)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
        at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
        at org.eclipse.jetty.server.Server.handle(Server.java:516)
        at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:388)
        at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:633)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:380)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
        at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:383)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:882)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1036)
        at java.lang.Thread.run(Thread.java:750)&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;the python environment:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;[tool.poetry]
name = "dev"
version = "0.1.0"
description = ""
authors = ["John Doe"]
&amp;nbsp;
&amp;nbsp;
[tool.poetry.dependencies]
python = "^3.8"
databricks-connect = "&amp;gt;=9.1.0,&amp;lt;9.2.0"
&amp;nbsp;
&amp;nbsp;
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;As nothing changed on our side I would appreciate any help.&lt;/P&gt;</description>
    <pubDate>Fri, 21 Mar 2025 13:17:57 GMT</pubDate>
    <dc:creator>JordiDekker</dc:creator>
    <dc:date>2025-03-21T13:17:57Z</dc:date>
    <item>
      <title>StreamCorruptedException, databricks-connect 9.1</title>
      <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7050#M3033</link>
      <description>&lt;P&gt;Last week, around the 21st of march, we started having issues with databricks-connect (DBR 9.1 LTS). "&lt;I&gt;databricks-connect test"&lt;/I&gt; works, but the following code snippet:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql import SparkSession
&amp;nbsp;
&amp;nbsp;
spark = SparkSession.builder.getOrCreate()
sparkcontext = spark.sparkContext
&amp;nbsp;
rdd1 = sparkcontext.parallelize(range(10), 10)
rdd2 = rdd1.map(lambda x: x + 1)
sum2 = rdd2.sum()
&amp;nbsp;
assert sum2 == 55&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;fails with error:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;log4j:WARN No appenders could be found for logger (org.apache.spark.util.Utils).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See &lt;A href="http://logging.apache.org/log4j/1.2/faq.html#noconfig" target="test_blank"&gt;http://logging.apache.org/log4j/1.2/faq.html#noconfig&lt;/A&gt; for more info.
View job details at https://[redacted].azuredatabricks.net/?o=[redacted]#/setting/clusters/[redacted]/sparkUi
Traceback (most recent call last):
  File "main.py", line 9, in &amp;lt;module&amp;gt;
    sum2 = rdd2.sum()
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/rdd.py", line 1259, in sum
    return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/rdd.py", line 1113, in fold
    vals = self.mapPartitions(func).collect()
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/rdd.py", line 967, in collect
    sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/py4j/java_gateway.py", line 1304, in __call__
    return_value = get_return_value(
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/pyspark/sql/utils.py", line 117, in deco
    return f(*a, **kw)
  File "/home/[redacted]/dev/debug-databricks-connect/.venv/lib/python3.8/site-packages/py4j/protocol.py", line 326, in get_return_value
    raise Py4JJavaError(
py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: java.io.StreamCorruptedException: invalid type code: 01
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1700)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
        at java.io.ObjectInputStream.readArray(ObjectInputStream.java:2119)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1657)
        at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2431)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2355)
        at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2213)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1669)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461)
        at org.apache.spark.sql.util.ProtoSerializer.$anonfun$deserializeObject$1(ProtoSerializer.scala:6631)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at org.apache.spark.sql.util.ProtoSerializer.deserializeObject(ProtoSerializer.scala:6616)
        at com.databricks.service.SparkServiceRPCHandler.execute0(SparkServiceRPCHandler.scala:728)
        at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC0$1(SparkServiceRPCHandler.scala:477)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at com.databricks.service.SparkServiceRPCHandler.executeRPC0(SparkServiceRPCHandler.scala:372)
        at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:323)
        at com.databricks.service.SparkServiceRPCHandler$$anon$2.call(SparkServiceRPCHandler.scala:309)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at com.databricks.service.SparkServiceRPCHandler.$anonfun$executeRPC$1(SparkServiceRPCHandler.scala:359)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at com.databricks.service.SparkServiceRPCHandler.executeRPC(SparkServiceRPCHandler.scala:336)
        at com.databricks.service.SparkServiceRPCServlet.doPost(SparkServiceRPCServer.scala:167)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:707)
        at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
        at org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799)
        at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:550)
        at org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:190)
        at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:501)
        at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
        at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127)
        at org.eclipse.jetty.server.Server.handle(Server.java:516)
        at org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:388)
        at org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:633)
        at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:380)
        at org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277)
        at org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311)
        at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105)
        at org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173)
        at org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
        at org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:383)
        at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:882)
        at org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1036)
        at java.lang.Thread.run(Thread.java:750)&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;the python environment:&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;[tool.poetry]
name = "dev"
version = "0.1.0"
description = ""
authors = ["John Doe"]
&amp;nbsp;
&amp;nbsp;
[tool.poetry.dependencies]
python = "^3.8"
databricks-connect = "&amp;gt;=9.1.0,&amp;lt;9.2.0"
&amp;nbsp;
&amp;nbsp;
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;As nothing changed on our side I would appreciate any help.&lt;/P&gt;</description>
      <pubDate>Fri, 21 Mar 2025 13:17:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7050#M3033</guid>
      <dc:creator>JordiDekker</dc:creator>
      <dc:date>2025-03-21T13:17:57Z</dc:date>
    </item>
    <item>
      <title>Re: StreamCorruptedException, databricks-connect 9.1</title>
      <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7051#M3034</link>
      <description>&lt;P&gt;Update: the snippet runs fine on an 11.3LTS cluster  (and dbc 11.3.* of course).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt; Is this a regression in 9.1? Could it have something to do with the log4j -&amp;gt; reload4j replacement in DBR &amp;lt;= 10.4?&lt;/P&gt;</description>
      <pubDate>Mon, 27 Mar 2023 09:42:38 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7051#M3034</guid>
      <dc:creator>JordiDekker</dc:creator>
      <dc:date>2023-03-27T09:42:38Z</dc:date>
    </item>
    <item>
      <title>Re: StreamCorruptedException, databricks-connect 9.1</title>
      <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7052#M3035</link>
      <description>&lt;P&gt;An error occurred while calling o390.createRDDFromTrustedPath. Trace: py4j.security.Py4JSecurityException: Method public org.apache.spark.api.java.JavaRDD org.apache.spark.sql.SparkSession.createRDDFromTrustedPath(java.lang.String,int) is not whitelisted on class class org.apache.spark.sql.SparkSessi&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am getting above error, how you are running this &lt;/P&gt;</description>
      <pubDate>Mon, 27 Mar 2023 11:57:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7052#M3035</guid>
      <dc:creator>Aviral-Bhardwaj</dc:creator>
      <dc:date>2023-03-27T11:57:31Z</dc:date>
    </item>
    <item>
      <title>Re: StreamCorruptedException, databricks-connect 9.1</title>
      <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7053#M3036</link>
      <description>&lt;P&gt;In this case locally, in a clean poetry environment.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 09:03:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7053#M3036</guid>
      <dc:creator>JordiDekker</dc:creator>
      <dc:date>2023-03-28T09:03:03Z</dc:date>
    </item>
    <item>
      <title>Re: StreamCorruptedException, databricks-connect 9.1</title>
      <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7054#M3037</link>
      <description>&lt;P&gt;Hi @Jordi Dekker​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you for posting your question in our community! We are happy to assist you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;To help us provide you with the most accurate information, could you please take a moment to review the responses and select the one that best answers your question?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;This will also help other community members who may have similar questions in the future. Thank you for your participation and let us know if you need any further assistance!&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 30 Mar 2023 07:50:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7054#M3037</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-03-30T07:50:33Z</dc:date>
    </item>
    <item>
      <title>Re: StreamCorruptedException, databricks-connect 9.1</title>
      <link>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7055#M3038</link>
      <description>&lt;P&gt;Hi @Vidula Khanna​,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;No answer has been given yet&lt;/P&gt;</description>
      <pubDate>Fri, 31 Mar 2023 07:29:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/streamcorruptedexception-databricks-connect-9-1/m-p/7055#M3038</guid>
      <dc:creator>JordiDekker</dc:creator>
      <dc:date>2023-03-31T07:29:25Z</dc:date>
    </item>
  </channel>
</rss>

