<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Connecting confluent to databricks. in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36927#M26222</link>
    <description>&lt;P&gt;You might want to watch this as well&amp;nbsp;&lt;A href="https://www.confluent.io/resources/online-talk/innovate-faster-and-easier-with-confluent-and-databricks-on-azure/?utm_medium=sem&amp;amp;utm_source=google&amp;amp;utm_campaign=ch.sem_br.nonbrand_tp.prs_tgt.dsa_mt.dsa_rgn.india_lng.eng_dv.all_con.online-talks&amp;amp;utm_term=&amp;amp;creative=&amp;amp;device=c&amp;amp;placement=&amp;amp;gad=1&amp;amp;gclid=Cj0KCQjwho-lBhC_ARIsAMpgMoc2gx6x2pVrRD55sOPpqlXCI59LciHsk-njdRcNwcfSN-d-Wp_tGTEaAszvEALw_wcB" target="_blank"&gt;https://www.confluent.io/resources/online-talk/innovate-faster-and-easier-with-confluent-and-databricks-on-azure/?utm_medium=sem&amp;amp;utm_source=google&amp;amp;utm_campaign=ch.sem_br.nonbrand_tp.prs_tgt.dsa_mt.dsa_rgn.india_lng.eng_dv.all_con.online-talks&amp;amp;utm_term=&amp;amp;creative=&amp;amp;device=c&amp;amp;placement=&amp;amp;gad=1&amp;amp;gclid=Cj0KCQjwho-lBhC_ARIsAMpgMoc2gx6x2pVrRD55sOPpqlXCI59LciHsk-njdRcNwcfSN-d-Wp_tGTEaAszvEALw_wcB&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 04 Jul 2023 11:42:58 GMT</pubDate>
    <dc:creator>VaibB</dc:creator>
    <dc:date>2023-07-04T11:42:58Z</dc:date>
    <item>
      <title>Connecting confluent to databricks.</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36768#M26194</link>
      <description>&lt;P&gt;Hi!!&lt;/P&gt;&lt;P&gt;Can someone tell me how to connect the confluent cloud to Databricks?&amp;nbsp;I am new to this so please elaborate on your answer.&lt;/P&gt;</description>
      <pubDate>Mon, 03 Jul 2023 12:46:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36768#M26194</guid>
      <dc:creator>Mbinyala</dc:creator>
      <dc:date>2023-07-03T12:46:45Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting confluent to databricks.</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36912#M26216</link>
      <description>&lt;P&gt;Here's a step-by-step guide to connecting Confluent Cloud to Databricks:&lt;/P&gt;
&lt;P&gt;Step 1: Set up a Confluent Cloud Cluster&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Sign up for a Confluent Cloud account at &lt;A href="https://confluent.cloud/" target="_new"&gt;https://confluent.cloud/&lt;/A&gt;&amp;nbsp;&lt;SPAN&gt;and create a new cluster if you haven't already.&lt;/SPAN&gt;&lt;/LI&gt;
&lt;LI&gt;Once your cluster is ready, note down the following information:
&lt;UL&gt;
&lt;LI&gt;Bootstrap Servers (e.g., &lt;CODE&gt;kafka-brokers.example.com:9092)&lt;/CODE&gt;&lt;/LI&gt;
&lt;LI&gt;API Key and Secret (for authentication)
&lt;P&gt;Step 2: Configure Databricks&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;In your Databricks workspace, create a new notebook or open an existing one.&lt;/LI&gt;
&lt;LI&gt;Set up the necessary configuration using the Databricks secret scope feature. Secrets allow you to securely store sensitive information like credentials. To create a secret scope and add secrets, follow these steps:
&lt;UL&gt;
&lt;LI&gt;Open the notebook and run the following command to create a secret&lt;/LI&gt;
&lt;LI&gt;
&lt;PRE&gt;scope:dbutils.secrets.createScope(scopeName)&lt;/PRE&gt;
&lt;P&gt;&lt;SPAN&gt;Configure the Confluent Cloud secrets by adding the bootstrap servers, API Key, and Secret to the created secret scope:&lt;/SPAN&gt;&lt;/P&gt;
&lt;PRE&gt;dbutils.secrets.put(scope = scopeName, key = "kafka.bootstrap.servers", value = "kafka-brokers.example.com:9092")&lt;BR /&gt;dbutils.secrets.put(scope = scopeName, key = "kafka.security.protocol", value = "SASL_SSL")&lt;BR /&gt;dbutils.secrets.put(scope = scopeName, key = "kafka.sasl.mechanism", value = "PLAIN")&lt;BR /&gt;dbutils.secrets.put(scope = scopeName, key = "kafka.sasl.jaas.config", value = "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"&amp;lt;API_KEY&amp;gt;\" password=\"&amp;lt;API_SECRET&amp;gt;\";")&lt;/PRE&gt;
&lt;P&gt;Step 3: Create a Streaming DataFrame in Databricks&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;In the same Databricks notebook, you can now create a Streaming DataFrame to consume data from Confluent Cloud. Here's an example code snippet:
&lt;DIV class=""&gt;
&lt;DIV class=""&gt;&lt;SPAN&gt;&lt;SPAN&gt;python&lt;/SPAN&gt;&lt;/SPAN&gt;
&lt;PRE class=""&gt;&lt;CODE class=""&gt;&lt;SPAN class=""&gt;from pyspark.sql.functions &lt;SPAN class=""&gt;import from_json, col
&lt;SPAN class=""&gt;from pyspark.sql.types &lt;SPAN class=""&gt;import StructType, StringType, DoubleType

&lt;SPAN class=""&gt;# Define the schema of the incoming data
schema = StructType().add(&lt;SPAN class=""&gt;"name", StringType()).add(&lt;SPAN class=""&gt;"age", DoubleType())

&lt;SPAN class=""&gt;# Read data from Kafka topic
kafka_bootstrap_servers = dbutils.secrets.get(scope = scopeName, key = &lt;SPAN class=""&gt;"kafka.bootstrap.servers")
df = spark \
  .readStream \
  .&lt;SPAN class=""&gt;format(&lt;SPAN class=""&gt;"kafka") \
  .option(&lt;SPAN class=""&gt;"kafka.bootstrap.servers", kafka_bootstrap_servers) \
  .option(&lt;SPAN class=""&gt;"subscribe", &lt;SPAN class=""&gt;"topic-name") \
  .option(&lt;SPAN class=""&gt;"startingOffsets", &lt;SPAN class=""&gt;"earliest") \
  .load()

&lt;SPAN class=""&gt;# Extract and process the data
processed_df = df \
  .select(from_json(col(&lt;SPAN class=""&gt;"value").cast(&lt;SPAN class=""&gt;"string"), schema).alias(&lt;SPAN class=""&gt;"data")) \
  .select(&lt;SPAN class=""&gt;"data.name", &lt;SPAN class=""&gt;"data.age")

&lt;SPAN class=""&gt;# Start the streaming query
query = processed_df.writeStream \
  .outputMode(&lt;SPAN class=""&gt;"append") \
  .&lt;SPAN class=""&gt;format(&lt;SPAN class=""&gt;"console") \
  .start()

query.awaitTermination()&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;Step 4: Customize the code as per your requirements&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;Modify the code snippet above to suit your specific use case. Update the schema definition, Kafka topic name, and any data transformations or output sinks as needed&lt;/LI&gt;
&lt;/UL&gt;
&lt;PRE class=""&gt;&lt;CODE class=""&gt;&lt;/CODE&gt;&lt;/PRE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;&lt;LI-WRAPPER&gt;&lt;/LI-WRAPPER&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;LI&gt;&lt;CODE&gt;&lt;/CODE&gt;&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;/UL&gt;</description>
      <pubDate>Tue, 04 Jul 2023 09:02:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36912#M26216</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-07-04T09:02:13Z</dc:date>
    </item>
    <item>
      <title>Re: Connecting confluent to databricks.</title>
      <link>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36927#M26222</link>
      <description>&lt;P&gt;You might want to watch this as well&amp;nbsp;&lt;A href="https://www.confluent.io/resources/online-talk/innovate-faster-and-easier-with-confluent-and-databricks-on-azure/?utm_medium=sem&amp;amp;utm_source=google&amp;amp;utm_campaign=ch.sem_br.nonbrand_tp.prs_tgt.dsa_mt.dsa_rgn.india_lng.eng_dv.all_con.online-talks&amp;amp;utm_term=&amp;amp;creative=&amp;amp;device=c&amp;amp;placement=&amp;amp;gad=1&amp;amp;gclid=Cj0KCQjwho-lBhC_ARIsAMpgMoc2gx6x2pVrRD55sOPpqlXCI59LciHsk-njdRcNwcfSN-d-Wp_tGTEaAszvEALw_wcB" target="_blank"&gt;https://www.confluent.io/resources/online-talk/innovate-faster-and-easier-with-confluent-and-databricks-on-azure/?utm_medium=sem&amp;amp;utm_source=google&amp;amp;utm_campaign=ch.sem_br.nonbrand_tp.prs_tgt.dsa_mt.dsa_rgn.india_lng.eng_dv.all_con.online-talks&amp;amp;utm_term=&amp;amp;creative=&amp;amp;device=c&amp;amp;placement=&amp;amp;gad=1&amp;amp;gclid=Cj0KCQjwho-lBhC_ARIsAMpgMoc2gx6x2pVrRD55sOPpqlXCI59LciHsk-njdRcNwcfSN-d-Wp_tGTEaAszvEALw_wcB&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 04 Jul 2023 11:42:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/connecting-confluent-to-databricks/m-p/36927#M26222</guid>
      <dc:creator>VaibB</dc:creator>
      <dc:date>2023-07-04T11:42:58Z</dc:date>
    </item>
  </channel>
</rss>

