<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Databricks Pub-Sub Data Recon in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37608#M500</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/33963"&gt;@Prabakar&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for the quick reply, I am looking for direct data count on PUBSUB not in databricks as we have to verify how many records were there in PUBSUB and how many records we have received in databricks on last 24 hrs.&lt;/P&gt;</description>
    <pubDate>Fri, 14 Jul 2023 05:57:15 GMT</pubDate>
    <dc:creator>Ajay-Pandey</dc:creator>
    <dc:date>2023-07-14T05:57:15Z</dc:date>
    <item>
      <title>Databricks Pub-Sub Data Recon</title>
      <link>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37138#M413</link>
      <description>&lt;P&gt;I am trying to setup a recon activity between GCP Pub-Sub and databricks, Is there any way to fetch the last 24hrs record count from&amp;nbsp;Pub-Sub?&lt;/P&gt;&lt;P&gt;I tried but not got any direct solution for it, It will be great if any one can suggest me the way t#pubsub, #databrickso achieve it.&lt;/P&gt;&lt;P&gt;#pubsub #databricks&lt;/P&gt;</description>
      <pubDate>Fri, 07 Jul 2023 07:21:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37138#M413</guid>
      <dc:creator>Ajay-Pandey</dc:creator>
      <dc:date>2023-07-07T07:21:15Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks Pub-Sub Data Recon</title>
      <link>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37197#M427</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;To fetch the last 24 hours' record count from Pub/Sub, you can use the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;publishTimestampInMillis&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;field in the Pub/Sub schema to filter the records based on their publish timestamp. You can use the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;current_timestamp()&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;function in Databricks to get the current timestamp and subtract 24 hours from it to get the timestamp for 24 hours ago. Then you can use the&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;filter()&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;function to filter the records based on their&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;publishTimestampInMillis&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;field.&lt;/P&gt;&lt;P&gt;Here's an example code snippet that demonstrates how to fetch the last 24 hours' record count from Pub/Sub using Databricks:&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="java"&gt;import org.apache.spark.sql.functions._

val authOptions: Map[String, String] =
 Map("clientId" -&amp;gt; clientId,
 "clientEmail" -&amp;gt; clientEmail,
 "privateKey" -&amp;gt; privateKey,
 "privateKeyId" -&amp;gt; privateKeyId)

val pubsubDF = spark.readStream
 .format("pubsub")
 .option("subscriptionId", "mysub")
 .option("topicId", "mytopic")
 .option("projectId", "myproject")
 .options(authOptions)
 .load()

val last24HoursTimestamp = current_timestamp() - expr("INTERVAL 24 HOURS")

val last24HoursCount = pubsubDF
  .filter(col("publishTimestampInMillis") &amp;gt;= last24HoursTimestamp.cast("long"))
  .count()

println(s"Last 24 hours record count: $last24HoursCount")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Note that this code snippet assumes that you have already configured the Pub/Sub connector in Databricks and have the necessary authorization options. If you haven't done so, please refer to the documentation on&lt;SPAN&gt;&amp;nbsp;&lt;A href="https://docs.gcp.databricks.com/structured-streaming/pub-sub.html#subscribe-to-google-pubsub" target="_blank" rel="noopener"&gt;Subscribe to Google Pub/Sub | Databricks on Google Cloud&lt;/A&gt;&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;for more information.&lt;/P&gt;</description>
      <pubDate>Fri, 07 Jul 2023 22:25:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37197#M427</guid>
      <dc:creator>Prabakar</dc:creator>
      <dc:date>2023-07-07T22:25:17Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks Pub-Sub Data Recon</title>
      <link>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37468#M471</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/71565"&gt;@Ajay-Pandey&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Hope you are well. Just wanted to see if you were able to find an answer to your question and would you like to mark an answer as best? It would be really helpful for the other members too.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Cheers!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jul 2023 09:37:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37468#M471</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-07-12T09:37:42Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks Pub-Sub Data Recon</title>
      <link>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37608#M500</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/33963"&gt;@Prabakar&lt;/a&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks for the quick reply, I am looking for direct data count on PUBSUB not in databricks as we have to verify how many records were there in PUBSUB and how many records we have received in databricks on last 24 hrs.&lt;/P&gt;</description>
      <pubDate>Fri, 14 Jul 2023 05:57:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/databricks-pub-sub-data-recon/m-p/37608#M500</guid>
      <dc:creator>Ajay-Pandey</dc:creator>
      <dc:date>2023-07-14T05:57:15Z</dc:date>
    </item>
  </channel>
</rss>

