<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: GCP auth time out in long running databricks job in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31837#M23191</link>
    <description>&lt;P&gt;Thanks, yes this seems to be the best work around - the good ole retry on fail. Thanks for the help.&lt;/P&gt;</description>
    <pubDate>Fri, 30 Sep 2022 17:42:13 GMT</pubDate>
    <dc:creator>elementalM</dc:creator>
    <dc:date>2022-09-30T17:42:13Z</dc:date>
    <item>
      <title>GCP auth time out in long running databricks job</title>
      <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31832#M23186</link>
      <description>&lt;P&gt;I'm wondering if you can help me with a google&amp;nbsp;auth issue related to structured streaming and long running databricks jobs in general. I will get this error after running for 8+ hours. Any tips on this? GCP auth issues for long running jobs?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Caused by: java.net.UnknownHostException: &lt;A href="http://oauth2.googleapis.com/" alt="http://oauth2.googleapis.com/" target="_blank"&gt;oauth2.googleapis.com&lt;/A&gt;&lt;/P&gt;&lt;P&gt;	at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:184)&lt;/P&gt;&lt;P&gt;	at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)&lt;/P&gt;&lt;P&gt;	at java.net.Socket.connect(Socket.java:607)&lt;/P&gt;&lt;P&gt;	at sun.security.ssl.SSLSocketImpl.connect(SSLSocketImpl.java:288)&lt;/P&gt;&lt;P&gt;	at sun.net.NetworkClient.doConnect(NetworkClient.java:175)&lt;/P&gt;&lt;P&gt;	at sun.net.www.http.HttpClient.openServer(HttpClient.java:463)&lt;/P&gt;&lt;P&gt;	at sun.net.www.http.HttpClient.openServer(HttpClient.java:558)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.https.HttpsClient.&amp;lt;init&amp;gt;(HttpsClient.java:264)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.https.HttpsClient.New(HttpsClient.java:367)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.getNewHttpClient(AbstractDelegateHttpsURLConnection.java:203)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.http.HttpURLConnection.plainConnect0(HttpURLConnection.java:1162)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.http.HttpURLConnection.plainConnect(HttpURLConnection.java:1056)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.https.AbstractDelegateHttpsURLConnection.connect(AbstractDelegateHttpsURLConnection.java:189)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.http.HttpURLConnection.getOutputStream0(HttpURLConnection.java:1340)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.http.HttpURLConnection.getOutputStream(HttpURLConnection.java:1315)&lt;/P&gt;&lt;P&gt;	at sun.net.www.protocol.https.HttpsURLConnectionImpl.getOutputStream(HttpsURLConnectionImpl.java:264)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.http.javanet.NetHttpRequest.execute(NetHttpRequest.java:113)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.http.javanet.NetHttpRequest.execute(NetHttpRequest.java:84)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.http.HttpRequest.execute(HttpRequest.java:1012)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.auth.oauth2.TokenRequest.executeUnparsed(TokenRequest.java:322)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.auth.oauth2.TokenRequest.execute(TokenRequest.java:346)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.cloud.hadoop.util.CredentialFactory$GoogleCredentialWithRetry.executeRefreshToken(CredentialFactory.java:170)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.auth.oauth2.Credential.refreshToken(Credential.java:494)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.auth.oauth2.Credential.intercept(Credential.java:217)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.http.HttpRequest.execute(HttpRequest.java:880)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:514)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.executeUnparsed(AbstractGoogleClientRequest.java:455)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.api.client.googleapis.services.AbstractGoogleClientRequest.execute(AbstractGoogleClientRequest.java:565)&lt;/P&gt;&lt;P&gt;	at shaded.databricks.com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl.getObject(GoogleCloudStorageImpl.java:2038)&lt;/P&gt;&lt;P&gt;	... 49 more&lt;/P&gt;&lt;P&gt;Driver stacktrace:&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:3029)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2976)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2970)&lt;/P&gt;&lt;P&gt;	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)&lt;/P&gt;&lt;P&gt;	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)&lt;/P&gt;&lt;P&gt;	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2970)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1390)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1390)&lt;/P&gt;&lt;P&gt;	at scala.Option.foreach(Option.scala:407)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1390)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3238)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3179)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3167)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:1152)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.SparkContext.runJobInternal(SparkContext.scala:2651)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2634)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:325)&lt;/P&gt;&lt;P&gt;	... 91 more&lt;/P&gt;&lt;P&gt;Caused by: com.databricks.sql.io.FileReadException: Error while reading file gs://em-blue-data/em-core-data/events/message_date=2022-09-13/part-00003-3f2affa0-0bd4-4e91-ab34-f22c57a2982b.c000.snappy.parquet.&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:521)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:494)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 13 Sep 2022 16:02:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31832#M23186</guid>
      <dc:creator>elementalM</dc:creator>
      <dc:date>2022-09-13T16:02:53Z</dc:date>
    </item>
    <item>
      <title>Re: GCP auth time out in long running databricks job</title>
      <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31834#M23188</link>
      <description>&lt;P&gt;No not at all. I just followed something along these lines: &lt;A href="https://docs.gcp.databricks.com/data/data-sources/google/gcs.html" target="test_blank"&gt;https://docs.gcp.databricks.com/data/data-sources/google/gcs.html&lt;/A&gt;.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It's not clear to me how to use this for structured streaming applications given the article you reference is geared for web applications.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Can you elaborate?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Sep 2022 12:57:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31834#M23188</guid>
      <dc:creator>elementalM</dc:creator>
      <dc:date>2022-09-15T12:57:28Z</dc:date>
    </item>
    <item>
      <title>Re: GCP auth time out in long running databricks job</title>
      <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31835#M23189</link>
      <description>&lt;P&gt;Hi @Dwight Branscombe​&amp;nbsp;I am wondering... are you using notebook workflows to stream your jobs? If so, take a look at this document &lt;A href="https://docs.gcp.databricks.com/structured-streaming/query-recovery.html#configure-structured-streaming-jobs-to-restart-streaming-queries-on-failure" alt="https://docs.gcp.databricks.com/structured-streaming/query-recovery.html#configure-structured-streaming-jobs-to-restart-streaming-queries-on-failure" target="_blank"&gt;here&lt;/A&gt;.&lt;/P&gt;</description>
      <pubDate>Fri, 16 Sep 2022 03:27:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31835#M23189</guid>
      <dc:creator>User16741082858</dc:creator>
      <dc:date>2022-09-16T03:27:52Z</dc:date>
    </item>
    <item>
      <title>Re: GCP auth time out in long running databricks job</title>
      <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31836#M23190</link>
      <description>&lt;P&gt;Hi @Dwight Branscombe​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 27 Sep 2022 12:09:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31836#M23190</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-09-27T12:09:21Z</dc:date>
    </item>
    <item>
      <title>Re: GCP auth time out in long running databricks job</title>
      <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31837#M23191</link>
      <description>&lt;P&gt;Thanks, yes this seems to be the best work around - the good ole retry on fail. Thanks for the help.&lt;/P&gt;</description>
      <pubDate>Fri, 30 Sep 2022 17:42:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31837#M23191</guid>
      <dc:creator>elementalM</dc:creator>
      <dc:date>2022-09-30T17:42:13Z</dc:date>
    </item>
    <item>
      <title>Re: GCP auth time out in long running databricks job</title>
      <link>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31833#M23187</link>
      <description>&lt;P&gt;Hi, This can be an issue with oauth2, could you please check if this steps were followed? &lt;A href="https://developers.google.com/identity/protocols/oauth2/web-server" alt="https://developers.google.com/identity/protocols/oauth2/web-server" target="_blank"&gt;https://developers.google.com/identity/protocols/oauth2/web-server&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 15 Sep 2022 05:42:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/gcp-auth-time-out-in-long-running-databricks-job/m-p/31833#M23187</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2022-09-15T05:42:45Z</dc:date>
    </item>
  </channel>
</rss>

