<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Failures Streaming data to Pulsar in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66082#M33011</link>
    <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103461"&gt;@surband&lt;/a&gt;&amp;nbsp;- The feature is in public preview within DBR. Only Read from pulsar sources are supported. we shall follow up with the engg for write support to Pulsar.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/en/connect/streaming/pulsar.html#stream-from-apache-pulsar" target="_blank"&gt;https://docs.databricks.com/en/connect/streaming/pulsar.html#stream-from-apache-pulsar&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 11 Apr 2024 16:27:06 GMT</pubDate>
    <dc:creator>shan_chandra</dc:creator>
    <dc:date>2024-04-11T16:27:06Z</dc:date>
    <item>
      <title>Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66032#M32992</link>
      <description>&lt;P&gt;I am encountering the following exception when attempting to stream data to a pulsar topic. This is a first time implementation - any ideas to resolve this is greatly appreciated.&lt;/P&gt;&lt;P&gt;DBR:&amp;nbsp;&lt;SPAN&gt;14.3 LTS ML (includes Apache Spark 3.5.0, Scala 2.12)&lt;/SPAN&gt;&lt;/P&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;1 Driver&lt;/SPAN&gt;&lt;SPAN class=""&gt;64&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;GB Memory,&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;16&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;Cores&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;Runtime&lt;/SPAN&gt;&lt;SPAN class=""&gt;14.3.x-cpu-ml-scala2.12&lt;/SPAN&gt;&lt;/DIV&gt;&lt;P&gt;&lt;U&gt;Exception:&lt;/U&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT size="2"&gt;Caused by: java.lang.NoSuchMethodError: org.apache.spark.sql.types.StructType.toAttributes()Lscala/collection/Seq;&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT size="2"&gt;at org.apache.spark.sql.pulsar.PulsarSink.addBatch(PulsarSinks.scala:47)&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT size="2"&gt;at org.apache.spark.sql.execution.streaming.MicroBatchExecution.addBatch(MicroBatchExecution.scala:1236)&lt;/FONT&gt;&lt;BR /&gt;&lt;FONT size="2"&gt;at org.apache.spark.sql.execution.streaming.MicroBatchExecution.$anonfun$runBatch$19(MicroBatchExecution.scala:1465)&lt;/FONT&gt;&lt;BR /&gt;&lt;U&gt;Code&lt;/U&gt;:&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;val&lt;/SPAN&gt; &lt;SPAN&gt;query&lt;/SPAN&gt; &lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; sourceDF&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .select( to_json( struct(col(&lt;/SPAN&gt;&lt;SPAN&gt;"*"&lt;/SPAN&gt;&lt;SPAN&gt;)) ) .alias(&lt;/SPAN&gt;&lt;SPAN&gt;"value"&lt;/SPAN&gt;&lt;SPAN&gt;) )&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .writeStream&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .format(&lt;/SPAN&gt;&lt;SPAN&gt;"pulsar"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .option(&lt;/SPAN&gt;&lt;SPAN&gt;"service.url"&lt;/SPAN&gt;&lt;SPAN&gt;, pulsarServiceUrl)&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .option(&lt;/SPAN&gt;&lt;SPAN&gt;"topic"&lt;/SPAN&gt;&lt;SPAN&gt;, pulsarTopic)&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .option(&lt;/SPAN&gt;&lt;SPAN&gt;"checkpointLocation"&lt;/SPAN&gt;&lt;SPAN&gt;, checkpointLocation)&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .trigger(&lt;/SPAN&gt;&lt;SPAN&gt;Trigger&lt;/SPAN&gt;&lt;SPAN&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;ProcessingTime&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"10 seconds"&lt;/SPAN&gt;&lt;SPAN&gt;))&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;SPAN&gt;&amp;nbsp; .start()&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 10 Apr 2024 18:57:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66032#M32992</guid>
      <dc:creator>surband</dc:creator>
      <dc:date>2024-04-10T18:57:02Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66033#M32993</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103461"&gt;@surband&lt;/a&gt;&amp;nbsp; - can you please share the full error stack trace. Also, please use the compatible DBR(Spark) version instead of ML runtime. Please refer to the below document and validate if you have the necessary connector libraries added to the cluster.&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.streamnative.io/hub/data-processing-pulsar-spark-3.2" target="_blank"&gt;https://docs.streamnative.io/hub/data-processing-pulsar-spark-3.2&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 10 Apr 2024 19:12:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66033#M32993</guid>
      <dc:creator>shan_chandra</dc:creator>
      <dc:date>2024-04-10T19:12:00Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66035#M32994</link>
      <description>&lt;P&gt;Please see attached log files and screenshot of DBR. The ones I selected for Runtime is one of the options in the dropdown. I can't tell from DBR which version of stream native is used underneath.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 10 Apr 2024 19:36:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66035#M32994</guid>
      <dc:creator>surband</dc:creator>
      <dc:date>2024-04-10T19:36:55Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66036#M32995</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103461"&gt;@surband&lt;/a&gt;&amp;nbsp; - Databricks Runtime version has a dropdown when you edit the cluster. There will be two options - Standard, ML.&amp;nbsp; could you please let us know if you have &lt;A href="https://docs.streamnative.io/hub/data-processing-pulsar-spark-3.2" target="_self"&gt;spark pulsar connector&lt;/A&gt; added to the cluster libraries?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Per the documentation &lt;A href="https://docs.databricks.com/en/connect/streaming/pulsar.html" target="_blank"&gt;here&lt;/A&gt;,&amp;nbsp;Structured Streaming provides exactly-once processing semantics for data read from Pulsar sources.&lt;/P&gt;
&lt;DIV id="syntax-example" class="section"&gt;&amp;nbsp;&lt;/DIV&gt;</description>
      <pubDate>Wed, 10 Apr 2024 20:00:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66036#M32995</guid>
      <dc:creator>shan_chandra</dc:creator>
      <dc:date>2024-04-10T20:00:58Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66043#M32998</link>
      <description>&lt;P&gt;Hello Shan_chandra, Tried with Standard DBR as suggested but same result. Spark Pulsar Connector as I understand is comes preinstalled in DBR. I have not explicitly installed anything - I did not see an documentation to do the same. The attached image "streamnative-pulsar.png" is a screenshot of Environments tab - that shows it's available in class path.&lt;/P&gt;</description>
      <pubDate>Wed, 10 Apr 2024 20:26:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66043#M32998</guid>
      <dc:creator>surband</dc:creator>
      <dc:date>2024-04-10T20:26:49Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66077#M33009</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/616"&gt;@shan_chandra&lt;/a&gt;&amp;nbsp;any suggestions ?&lt;/P&gt;</description>
      <pubDate>Thu, 11 Apr 2024 15:48:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66077#M33009</guid>
      <dc:creator>surband</dc:creator>
      <dc:date>2024-04-11T15:48:13Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66081#M33010</link>
      <description>&lt;P&gt;Logs attached&lt;/P&gt;</description>
      <pubDate>Thu, 11 Apr 2024 16:24:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66081#M33010</guid>
      <dc:creator>surband</dc:creator>
      <dc:date>2024-04-11T16:24:13Z</dc:date>
    </item>
    <item>
      <title>Re: Failures Streaming data to Pulsar</title>
      <link>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66082#M33011</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103461"&gt;@surband&lt;/a&gt;&amp;nbsp;- The feature is in public preview within DBR. Only Read from pulsar sources are supported. we shall follow up with the engg for write support to Pulsar.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/en/connect/streaming/pulsar.html#stream-from-apache-pulsar" target="_blank"&gt;https://docs.databricks.com/en/connect/streaming/pulsar.html#stream-from-apache-pulsar&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 11 Apr 2024 16:27:06 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/failures-streaming-data-to-pulsar/m-p/66082#M33011</guid>
      <dc:creator>shan_chandra</dc:creator>
      <dc:date>2024-04-11T16:27:06Z</dc:date>
    </item>
  </channel>
</rss>

