<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: MongoDB Spark Connector v10.x read error on Databricks 13.x in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/mongodb-spark-connector-v10-x-read-error-on-databricks-13-x/m-p/45342#M27855</link>
    <description>&lt;P&gt;The problem was fixed in Databricks Runtime 13.3 LTS.&lt;/P&gt;</description>
    <pubDate>Tue, 19 Sep 2023 15:02:48 GMT</pubDate>
    <dc:creator>silvadev</dc:creator>
    <dc:date>2023-09-19T15:02:48Z</dc:date>
    <item>
      <title>MongoDB Spark Connector v10.x read error on Databricks 13.x</title>
      <link>https://community.databricks.com/t5/data-engineering/mongodb-spark-connector-v10-x-read-error-on-databricks-13-x/m-p/38530#M26663</link>
      <description>&lt;P&gt;I have facing a error when I am trying to read data from any MongoDB collection using MongoDB Spark Connector v10.x on Databricks v13.x.&lt;/P&gt;&lt;P&gt;The below error appear to start at line &lt;A href="https://github.com/mongodb/mongo-spark/blob/febba6ec15b75724a274eee1e65b82d666a46b66/src/main/java/com/mongodb/spark/sql/connector/schema/InferSchema.java#L103" target="_self"&gt;#113&lt;/A&gt;&amp;nbsp;of MongoDB Spark Connector Library (v10.2.0):&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;java.lang.NoSuchMethodError: org.apache.spark.sql.types.DataType.sameType(Lorg/apache/spark/sql/types/DataType;)Z
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
File &amp;lt;command-3492412077247672&amp;gt;:6
      1 mongo_opts = {'connection.uri': conf.mongodb.read_uri, 
      2               'database': 'setorizacao',
      3               'collection': 'outlet',
      4               'outputExtendedJson': 'true'}
----&amp;gt; 6 mongo_outl = spark.read.load(format='mongodb', **mongo_opts)

File /databricks/spark/python/pyspark/instrumentation_utils.py:48, in _wrap_function.&amp;lt;locals&amp;gt;.wrapper(*args, **kwargs)
     46 start = time.perf_counter()
     47 try:
---&amp;gt; 48     res = func(*args, **kwargs)
     49     logger.log_success(
     50         module_name, class_name, function_name, time.perf_counter() - start, signature
     51     )
     52     return res

File /databricks/spark/python/pyspark/sql/readwriter.py:314, in DataFrameReader.load(self, path, format, schema, **options)
    312     return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path)))
    313 else:
--&amp;gt; 314     return self._df(self._jreader.load())

File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322, in JavaMember.__call__(self, *args)
   1316 command = proto.CALL_COMMAND_NAME +\
   1317     self.command_header +\
   1318     args_command +\
   1319     proto.END_COMMAND_PART
   1321 answer = self.gateway_client.send_command(command)
-&amp;gt; 1322 return_value = get_return_value(
   1323     answer, self.gateway_client, self.target_id, self.name)
   1325 for temp_arg in temp_args:
   1326     if hasattr(temp_arg, "_detach"):

File /databricks/spark/python/pyspark/errors/exceptions/captured.py:188, in capture_sql_exception.&amp;lt;locals&amp;gt;.deco(*a, **kw)
    186 def deco(*a: Any, **kw: Any) -&amp;gt; Any:
    187     try:
--&amp;gt; 188         return f(*a, **kw)
    189     except Py4JJavaError as e:
    190         converted = convert_exception(e.java_exception)

File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
    324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325 if answer[1] == REFERENCE_TYPE:
--&amp;gt; 326     raise Py4JJavaError(
    327         "An error occurred while calling {0}{1}{2}.\n".
    328         format(target_id, ".", name), value)
    329 else:
    330     raise Py4JError(
    331         "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332         format(target_id, ".", name, value))

Py4JJavaError: An error occurred while calling o1020.load.
: java.lang.NoSuchMethodError: org.apache.spark.sql.types.DataType.sameType(Lorg/apache/spark/sql/types/DataType;)Z
	at com.mongodb.spark.sql.connector.schema.InferSchema.lambda$inferSchema$4(InferSchema.java:103)
	at java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:193)
	at java.util.Spliterators$ArraySpliterator.forEachRemaining(Spliterators.java:948)
	at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
	at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
	at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
	at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
	at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566)
	at com.mongodb.spark.sql.connector.schema.InferSchema.inferSchema(InferSchema.java:112)
	at com.mongodb.spark.sql.connector.schema.InferSchema.inferSchema(InferSchema.java:78)
	at com.mongodb.spark.sql.connector.MongoTableProvider.inferSchema(MongoTableProvider.java:60)
	at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.getTableFromProvider(DataSourceV2Utils.scala:91)
	at org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils$.loadV2Source(DataSourceV2Utils.scala:138)
	at org.apache.spark.sql.DataFrameReader.$anonfun$load$1(DataFrameReader.scala:333)
	at scala.Option.flatMap(Option.scala:271)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:331)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:226)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:397)
	at py4j.Gateway.invoke(Gateway.java:306)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have tested all versions of Spark Connector from 10.1.0 to 10.2.0. I have also tested all versions of Databricks 13, from 13.0 to 13.2.&amp;nbsp;I have tested in versions 5 and 6 of MongoDB server (Atlas).&lt;/P&gt;&lt;P&gt;For now I am using the library with the Maven Repository coordinates&amp;nbsp;&lt;STRONG&gt;org.mongodb.spark:mongo-spark-connector_2.12:10.2.0&lt;/STRONG&gt;, but previously I have also used the official jar file avaliable in &lt;A href="https://repo1.maven.org/maven2/org/mongodb/spark/mongo-spark-connector_2.12/10.2.0/mongo-spark-connector_2.12-10.2.0-all.jar" target="_self"&gt;this link&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;Using the version&amp;nbsp;3.0.2 of Spark Connector works well for reading and writing operations. Write operations also works fine in versions 10.x of Spark Connector.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I have tried to read data of same collections of MongoDB in a local setup of Spark and this worked normally. For this, I have used the version Spark 3.4.1, Java 11.0.19 (Azul Zulu) and Python 3.10.6 (for PySpark).&lt;/P&gt;&lt;P&gt;The error not occurs in Databricks 12.2 and below.&lt;/P&gt;&lt;H2&gt;Configuration to reproduce error:&lt;/H2&gt;&lt;UL&gt;&lt;LI&gt;&lt;STRONG&gt;Databricks&lt;/STRONG&gt;: 13.2 (&lt;SPAN&gt;Apache Spark 3.4.0, Scala 2.12, Python 3.10.6&lt;/SPAN&gt;)&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;MongoDB Spark Connector&lt;/STRONG&gt;: 10.2.0 (Scala 2.12)&lt;BR /&gt;Maven Coordinates:&amp;nbsp;&lt;STRONG&gt;org.mongodb.spark:mongo-spark-connector_2.12:10.2.0&lt;/STRONG&gt;&lt;/LI&gt;&lt;LI&gt;&lt;STRONG&gt;MongoDB&lt;/STRONG&gt;: Atlas free tier (version 6).&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Thu, 27 Jul 2023 12:49:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/mongodb-spark-connector-v10-x-read-error-on-databricks-13-x/m-p/38530#M26663</guid>
      <dc:creator>silvadev</dc:creator>
      <dc:date>2023-07-27T12:49:48Z</dc:date>
    </item>
    <item>
      <title>Re: MongoDB Spark Connector v10.x read error on Databricks 13.x</title>
      <link>https://community.databricks.com/t5/data-engineering/mongodb-spark-connector-v10-x-read-error-on-databricks-13-x/m-p/45342#M27855</link>
      <description>&lt;P&gt;The problem was fixed in Databricks Runtime 13.3 LTS.&lt;/P&gt;</description>
      <pubDate>Tue, 19 Sep 2023 15:02:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/mongodb-spark-connector-v10-x-read-error-on-databricks-13-x/m-p/45342#M27855</guid>
      <dc:creator>silvadev</dc:creator>
      <dc:date>2023-09-19T15:02:48Z</dc:date>
    </item>
  </channel>
</rss>

