<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Getting Spark &amp; Scala version in Cluster node initialization script in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33057#M24147</link>
    <description>&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2229i2D3B7632347FA8AD/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 16 Dec 2021 18:22:08 GMT</pubDate>
    <dc:creator>Prabakar</dc:creator>
    <dc:date>2021-12-16T18:22:08Z</dc:date>
    <item>
      <title>Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33053#M24143</link>
      <description>&lt;P&gt;Hi there, &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I am developing a Cluster node initialization script (&lt;A href="https://docs.gcp.databricks.com/clusters/init-scripts.html#environment-variables" alt="https://docs.gcp.databricks.com/clusters/init-scripts.html#environment-variables" target="_blank"&gt;https://docs.gcp.databricks.com/clusters/init-scripts.html#environment-variables&lt;/A&gt;) in order to install some custom libraries.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Reading the docs of Databricks we can get some environment variables with data related with the current running cluster node. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;But I need to figure out what Spark &amp;amp; Scala version is currently been deployed. Is this possible?&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Dec 2021 10:11:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33053#M24143</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-16T10:11:09Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33055#M24145</link>
      <description>&lt;P&gt;Hi Kaniz, thank you very much. For sure I will learn very much in this forum.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Dec 2021 16:32:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33055#M24145</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-16T16:32:39Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33056#M24146</link>
      <description>&lt;P&gt;Hi @A Huarte​&amp;nbsp;you can get the spark and scala version from the DBR that you will be using on the cluster.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2222i7A91B52007188B48/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Dec 2021 18:12:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33056#M24146</guid>
      <dc:creator>Prabakar</dc:creator>
      <dc:date>2021-12-16T18:12:10Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33057#M24147</link>
      <description>&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2229i2D3B7632347FA8AD/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Dec 2021 18:22:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33057#M24147</guid>
      <dc:creator>Prabakar</dc:creator>
      <dc:date>2021-12-16T18:22:08Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33058#M24148</link>
      <description>&lt;P&gt;Hi @Prabakar Ammeappin​&amp;nbsp;Thank you very much for your response, &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;but I mean how I can get this info in a script. I am trying to develop this sh init script for several Clusters with different Databricks runtimes. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried it searching files in that script but I did not find any "*spark*.jar" file from where to extract the current version of the runtime (Spark &amp;amp; Scala version).&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;When the cluster is already started there are files with this pattern, but in the moment that the init script is executed it seems that pyspark is not installed yet.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Dec 2021 18:35:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33058#M24148</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-16T18:35:31Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33059#M24149</link>
      <description>&lt;P&gt;I know that Databricks CLI tool is available, but it is not configured when the init script is running.&lt;/P&gt;</description>
      <pubDate>Thu, 16 Dec 2021 18:37:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33059#M24149</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-16T18:37:29Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33060#M24150</link>
      <description>&lt;P&gt;Hm, this is a hacky idea, maybe there is a better way, but you could  &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;ls /databricks/jars/spark*&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;and parse the results to get the version of Spark and Scala. You'll see files like spark--command--command-spark_3.1_2.12_deploy.jar containing the versions.&lt;/P&gt;</description>
      <pubDate>Fri, 17 Dec 2021 02:16:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33060#M24150</guid>
      <dc:creator>sean_owen</dc:creator>
      <dc:date>2021-12-17T02:16:59Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33061#M24151</link>
      <description>&lt;P&gt;Hi @Sean Owen​&amp;nbsp;thanks four your reply,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;your idea can work, but unfortunatelly there is any filename with the full version name. I am missing the minor part:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;yyyyyy_spark_3.2_2.12_xxxxx.jar -&amp;gt; Spark version is really 3.2.0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have configured databricks CLI to get metadata of the cluster and I get this output:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;  "cluster_id": "XXXXXXXXX",&lt;/P&gt;&lt;P&gt;  "spark_context_id": YYYYYYYYYYYY,&lt;/P&gt;&lt;P&gt;  "cluster_name": "Devel - Geospatial",&lt;/P&gt;&lt;P&gt;  "spark_version": "10.1.x-cpu-ml-scala2.12", ##&amp;lt;------!!!!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt; ....&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;"spark_version" property does not contain info about the spark version but about the DBR :-(, any thoughts?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;&lt;P&gt;regards&lt;/P&gt;&lt;P&gt;Alvaro&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Dec 2021 10:23:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33061#M24151</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-17T10:23:26Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33062#M24152</link>
      <description>&lt;P&gt;Do you need such specific Spark version info, why? should not matter for user applications&lt;/P&gt;</description>
      <pubDate>Fri, 17 Dec 2021 13:16:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33062#M24152</guid>
      <dc:creator>sean_owen</dc:creator>
      <dc:date>2021-12-17T13:16:03Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33063#M24153</link>
      <description>&lt;P&gt;I am trying to install Geomesa, from: &lt;A href="https://mvnrepository.com/artifact/org.locationtech.geomesa/geomesa-gt-spark-runtime" alt="https://mvnrepository.com/artifact/org.locationtech.geomesa/geomesa-gt-spark-runtime" target="_blank"&gt;https://mvnrepository.com/artifact/org.locationtech.geomesa/geomesa-gt-spark-runtime&lt;/A&gt;&lt;/P&gt;&lt;P&gt;or&lt;/P&gt;&lt;P&gt;from:&lt;/P&gt;&lt;P&gt;&lt;A href="https://github.com/locationtech/geomesa/releases" target="test_blank"&gt;https://github.com/locationtech/geomesa/releases&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I think I need the exact release.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Dec 2021 13:31:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33063#M24153</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-17T13:31:05Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33064#M24154</link>
      <description>&lt;P&gt;I doubt it's sensitive to a minor release, why?&lt;/P&gt;&lt;P&gt;But you also control what DBR/Spark version you launch the cluster with&lt;/P&gt;</description>
      <pubDate>Fri, 17 Dec 2021 14:32:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33064#M24154</guid>
      <dc:creator>sean_owen</dc:creator>
      <dc:date>2021-12-17T14:32:32Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33065#M24155</link>
      <description>&lt;P&gt;Many thanks @Sean Owen​&amp;nbsp;I am going to apply your advice, I am not going to write a generic init script that figures out everything, but a specific version of it for each Cluster type, really we only have 3 DBR types.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you very much for your support&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 17 Dec 2021 16:53:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33065#M24155</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-17T16:53:53Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33066#M24156</link>
      <description>&lt;P&gt;@A Huarte​&amp;nbsp;- How did it go?&lt;/P&gt;</description>
      <pubDate>Mon, 27 Dec 2021 17:17:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33066#M24156</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2021-12-27T17:17:18Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33067#M24157</link>
      <description>&lt;P&gt;Hi, &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My idea was to deploy Geomesa or Rasterframes on Databricks in order to provide spatial capabilities to this platofrm. Finally, according to some advices in Rasterframes Gitter chat I selected the DBR 9.0 where I am installing pyrasterframes 0.10.0 via "pip" and no getting any errors. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I hope this info can be help.&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Dec 2021 11:44:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33067#M24157</guid>
      <dc:creator>ahuarte</dc:creator>
      <dc:date>2021-12-28T11:44:19Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33068#M24158</link>
      <description>&lt;P&gt;Thank you so much! Would you be happy to mark whichever answer is best in your mind? That will help new members know which is the most effective.&lt;/P&gt;</description>
      <pubDate>Fri, 31 Dec 2021 16:10:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33068#M24158</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2021-12-31T16:10:01Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33069#M24159</link>
      <description>&lt;P&gt;You should be able to just pick the version that matches Spark and Scala from maven.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here is a simple way to get the cluster Spark version&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2224iEA2C57A4948CD2E2/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 27 Jan 2022 14:14:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33069#M24159</guid>
      <dc:creator>User16764241763</dc:creator>
      <dc:date>2022-01-27T14:14:20Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33070#M24160</link>
      <description>&lt;P&gt;The question is about an init script though&lt;/P&gt;</description>
      <pubDate>Thu, 27 Jan 2022 14:26:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33070#M24160</guid>
      <dc:creator>sean_owen</dc:creator>
      <dc:date>2022-01-27T14:26:50Z</dc:date>
    </item>
    <item>
      <title>Re: Getting Spark &amp; Scala version in Cluster node initialization script</title>
      <link>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33071#M24161</link>
      <description>&lt;P&gt;We can infer the cluster DBR version using the env $DATABRICKS_RUNTIME_VERSION. (For the exact spark/scala version mapping, you can refer to the specific &lt;A href="https://docs.databricks.com/release-notes/runtime/releases.html" alt="https://docs.databricks.com/release-notes/runtime/releases.html" target="_blank"&gt;DBR release notes&lt;/A&gt;)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Sample usage inside a init script, &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;DBR_10_4_VERSION="10.4"
if [[ "$DATABRICKS_RUNTIME_VERSION" == "$DBR_10_4_VERSION"* ]]; then
  echo "running 10.4 specific commands"
else
  echo "Skipping 10.4 specific commands"
fi&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 14 Sep 2022 16:02:25 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/getting-spark-scala-version-in-cluster-node-initialization/m-p/33071#M24161</guid>
      <dc:creator>Lingesh</dc:creator>
      <dc:date>2022-09-14T16:02:25Z</dc:date>
    </item>
  </channel>
</rss>

