<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Databricks Clusters on GCP stop working &amp;quot;Environment directory not found&amp;quot; issue - waitForEnvironmentFileSystem in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-clusters-on-gcp-stop-working-quot-environment/m-p/20230#M13632</link>
    <description>&lt;P&gt;Starting from yesterday 17/5/2022 i start getting errors while running notebooks or jobs on clusters of Databricks GCP. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;U&gt;The error is: &lt;/U&gt;&lt;/P&gt;&lt;P&gt;SparkException: Environment directory not found at /local_disk0/.ephemeral_nfs/cluster_libraries/python&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The job/notebooks can do some of the operations but some of the operations like: &lt;/P&gt;&lt;P&gt;display(dbutils.fs.ls("/%s" % mount_name))&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried to start a new cluster. I tried to reduce any init scripts. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;U&gt;The full error: &lt;/U&gt;&lt;/P&gt;&lt;P&gt;22/05/18 05:30:09 WARN TaskSetManager: Lost task 3.0 in stage 0.0 (TID 3) (10.71.1.3 executor 0): org.apache.spark.SparkException: Environment directory not found at /local_disk0/.ephemeral_nfs/cluster_libraries/python&lt;/P&gt;&lt;P&gt;	at org.apache.spark.util.DatabricksUtils$.waitForEnvironmentFileSystem(DatabricksUtils.scala:685)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.$anonfun$startDaemon$1(PythonWorkerFactory.scala:273)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.$anonfun$startDaemon$1$adapted(PythonWorkerFactory.scala:273)&lt;/P&gt;&lt;P&gt;	at scala.Option.foreach(Option.scala:407)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.startDaemon(PythonWorkerFactory.scala:273)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.createThroughDaemon(PythonWorkerFactory.scala:185)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:134)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:209)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:251)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:77)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecutionRDD.$anonfun$compute$1(SQLExecutionRDD.scala:57)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.internal.SQLConf$.withExistingConf(SQLConf.scala:170)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:57)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.Task.doRunTask(Task.scala:156)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:125)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.Task.run(Task.scala:95)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:826)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1670)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:829)&lt;/P&gt;&lt;P&gt;	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:684)&lt;/P&gt;&lt;P&gt;	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/P&gt;&lt;P&gt;	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/P&gt;&lt;P&gt;	at java.lang.Thread.run(Thread.java:748)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 18 May 2022 16:45:12 GMT</pubDate>
    <dc:creator>720677</dc:creator>
    <dc:date>2022-05-18T16:45:12Z</dc:date>
    <item>
      <title>Databricks Clusters on GCP stop working "Environment directory not found" issue - waitForEnvironmentFileSystem</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-clusters-on-gcp-stop-working-quot-environment/m-p/20230#M13632</link>
      <description>&lt;P&gt;Starting from yesterday 17/5/2022 i start getting errors while running notebooks or jobs on clusters of Databricks GCP. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;U&gt;The error is: &lt;/U&gt;&lt;/P&gt;&lt;P&gt;SparkException: Environment directory not found at /local_disk0/.ephemeral_nfs/cluster_libraries/python&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The job/notebooks can do some of the operations but some of the operations like: &lt;/P&gt;&lt;P&gt;display(dbutils.fs.ls("/%s" % mount_name))&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried to start a new cluster. I tried to reduce any init scripts. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;U&gt;The full error: &lt;/U&gt;&lt;/P&gt;&lt;P&gt;22/05/18 05:30:09 WARN TaskSetManager: Lost task 3.0 in stage 0.0 (TID 3) (10.71.1.3 executor 0): org.apache.spark.SparkException: Environment directory not found at /local_disk0/.ephemeral_nfs/cluster_libraries/python&lt;/P&gt;&lt;P&gt;	at org.apache.spark.util.DatabricksUtils$.waitForEnvironmentFileSystem(DatabricksUtils.scala:685)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.$anonfun$startDaemon$1(PythonWorkerFactory.scala:273)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.$anonfun$startDaemon$1$adapted(PythonWorkerFactory.scala:273)&lt;/P&gt;&lt;P&gt;	at scala.Option.foreach(Option.scala:407)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.startDaemon(PythonWorkerFactory.scala:273)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.createThroughDaemon(PythonWorkerFactory.scala:185)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonWorkerFactory.create(PythonWorkerFactory.scala:134)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.SparkEnv.createPythonWorker(SparkEnv.scala:209)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.BasePythonRunner.compute(PythonRunner.scala:251)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:77)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecutionRDD.$anonfun$compute$1(SQLExecutionRDD.scala:57)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.internal.SQLConf$.withExistingConf(SQLConf.scala:170)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.SQLExecutionRDD.compute(SQLExecutionRDD.scala:57)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:380)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.rdd.RDD.iterator(RDD.scala:344)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.Task.doRunTask(Task.scala:156)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:125)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.scheduler.Task.run(Task.scala:95)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:826)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1670)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:829)&lt;/P&gt;&lt;P&gt;	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)&lt;/P&gt;&lt;P&gt;	at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:684)&lt;/P&gt;&lt;P&gt;	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)&lt;/P&gt;&lt;P&gt;	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)&lt;/P&gt;&lt;P&gt;	at java.lang.Thread.run(Thread.java:748)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 18 May 2022 16:45:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-clusters-on-gcp-stop-working-quot-environment/m-p/20230#M13632</guid>
      <dc:creator>720677</dc:creator>
      <dc:date>2022-05-18T16:45:12Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks Clusters on GCP stop working "Environment directory not found" issue - waitForEnvironmentFileSystem</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-clusters-on-gcp-stop-working-quot-environment/m-p/20232#M13634</link>
      <description>&lt;P&gt;Databricks supports detected an issue with the NFS mounts on GCP. &lt;/P&gt;&lt;P&gt;Looks like DBR 10.X versions were affected. &lt;/P&gt;&lt;P&gt;After several hours they fixed it and now the same clusters are back to normal. &lt;/P&gt;</description>
      <pubDate>Thu, 19 May 2022 07:15:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-clusters-on-gcp-stop-working-quot-environment/m-p/20232#M13634</guid>
      <dc:creator>720677</dc:creator>
      <dc:date>2022-05-19T07:15:53Z</dc:date>
    </item>
  </channel>
</rss>

