<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How can I start SparkSession out of Notebook? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39450#M26985</link>
    <description>&lt;P&gt;Thank you for all replies.&lt;BR /&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/14792"&gt;@-werners-&lt;/a&gt;&amp;nbsp;I want to use Spark Session in modules which is called from Notebook.&lt;/P&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/86158"&gt;@sakhulaz&lt;/a&gt;&amp;nbsp;How can I get the config options to attach to the Databricks data?&lt;/P&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/39403"&gt;@Tharun-Kumar&lt;/a&gt;&amp;nbsp;Thank you. That approach definitely works for my situation!&lt;/P&gt;</description>
    <pubDate>Wed, 09 Aug 2023 14:55:37 GMT</pubDate>
    <dc:creator>NCat</dc:creator>
    <dc:date>2023-08-09T14:55:37Z</dc:date>
    <item>
      <title>How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39403#M26963</link>
      <description>&lt;P&gt;Hi community,&lt;/P&gt;&lt;P&gt;How can I start SparkSession out of Notebook?&lt;BR /&gt;I want to split my Notebook into small Python modules, and I want to let some of them to call Spark functionality.&lt;/P&gt;</description>
      <pubDate>Tue, 08 Aug 2023 23:11:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39403#M26963</guid>
      <dc:creator>NCat</dc:creator>
      <dc:date>2023-08-08T23:11:40Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39417#M26970</link>
      <description>&lt;P&gt;can you elaborate a bit more?&lt;BR /&gt;Are you going to call those modules in a notebook, and want to use spark functions in them?&lt;BR /&gt;Or do you want to explicitly start a separate sparksession for each module?&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2023 07:14:46 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39417#M26970</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-08-09T07:14:46Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39433#M26975</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;To start a SparkSession outside of a notebook, you can follow these steps to split your code into small Python modules and utilize Spark functionality:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Import Required Libraries: In your Python module, import the necessary libraries for Spark:&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;In your Python module, import the necessary libraries for Spark:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import SparkSession&lt;/LI-CODE&gt;&lt;OL&gt;&lt;LI&gt;Create SparkSession:&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Initialize the SparkSession at the beginning of your module:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;spark = SparkSession.builder \
    .appName("YourAppName") \
    .config("spark.some.config.option", "config-value") \
    .getOrCreate()&lt;/LI-CODE&gt;&lt;P&gt;Customize the configuration options as needed.&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2023 10:21:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39433#M26975</guid>
      <dc:creator>sakhulaz</dc:creator>
      <dc:date>2023-08-09T10:21:13Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39438#M26977</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/85909"&gt;@NCat&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Databricks provides Spark Session out of the box. You have to just use the variable "spark".&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Screenshot 2023-08-09 at 5.52.07 PM.png" style="width: 999px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/3102iDDE879632F395FC7/image-size/large?v=v2&amp;amp;px=999" role="button" title="Screenshot 2023-08-09 at 5.52.07 PM.png" alt="Screenshot 2023-08-09 at 5.52.07 PM.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;In order to use it in other modules, you have to pass the spark variable as a parameter to the other modules.&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2023 12:22:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39438#M26977</guid>
      <dc:creator>Tharun-Kumar</dc:creator>
      <dc:date>2023-08-09T12:22:56Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39450#M26985</link>
      <description>&lt;P&gt;Thank you for all replies.&lt;BR /&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/14792"&gt;@-werners-&lt;/a&gt;&amp;nbsp;I want to use Spark Session in modules which is called from Notebook.&lt;/P&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/86158"&gt;@sakhulaz&lt;/a&gt;&amp;nbsp;How can I get the config options to attach to the Databricks data?&lt;/P&gt;&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/39403"&gt;@Tharun-Kumar&lt;/a&gt;&amp;nbsp;Thank you. That approach definitely works for my situation!&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2023 14:55:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39450#M26985</guid>
      <dc:creator>NCat</dc:creator>
      <dc:date>2023-08-09T14:55:37Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39452#M26986</link>
      <description>&lt;P&gt;in general (as already stated) a notebook automatically gets a sparksession.&lt;BR /&gt;You don't have to do anything.&lt;BR /&gt;If you specifically need to have separate sessions (isolation), you should run different notebooks (or plan different jobs) as these get a new session (a session per notebook/job).&lt;BR /&gt;Calling magic functions like %scala, %run etc use the same sparksession, so no isolation there.&lt;/P&gt;</description>
      <pubDate>Wed, 09 Aug 2023 15:03:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/39452#M26986</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-08-09T15:03:48Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/76148#M35155</link>
      <description>&lt;P&gt;To start a SparkSession outside of a Jupyter Notebook and enable its use in multiple Python modules, follow these steps:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;P data-unlink="true"&gt;&lt;STRONG&gt;Install Apache Spark&lt;/STRONG&gt;: Ensure Spark is installed on your system. You can download it from the Apache Spark website&amp;nbsp; and set it up with Hadoop or use a standalone cluster.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Set Up Environment Variables&lt;/STRONG&gt;: Configure the necessary environment variables (SPARK_HOME, JAVA_HOME, and PYTHONPATH) to point to the correct locations.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Create a Spark Configuration Module&lt;/STRONG&gt;: Create a Python file (e.g., spark_config.py) to set up the SparkSession:&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;python&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;Copy code&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;from&lt;/SPAN&gt; pyspark.sql &lt;SPAN class=""&gt;import&lt;/SPAN&gt; SparkSession &lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;create_spark_session&lt;/SPAN&gt;(&lt;SPAN class=""&gt;app_name=&lt;SPAN class=""&gt;"MyApp"&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt; spark = SparkSession.builder \ .appName(app_name) \ .getOrCreate() &lt;SPAN class=""&gt;return&lt;/SPAN&gt; spark&lt;/DIV&gt;&lt;/DIV&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Initialize SparkSession in Your Modules&lt;/STRONG&gt;: Import and use the create_spark_session function in your Python modules to get the SparkSession:&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;SPAN&gt;python&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;Copy code&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;from&lt;/SPAN&gt; spark_config &lt;SPAN class=""&gt;import&lt;/SPAN&gt; create_spark_session spark = create_spark_session(&lt;SPAN class=""&gt;"ModuleName"&lt;/SPAN&gt;) &lt;SPAN class=""&gt;# Now you can use Spark functionality, e.g.:&lt;/SPAN&gt; df = spark.read.csv(&lt;SPAN class=""&gt;"path/to/data.csv"&lt;/SPAN&gt;) df.show()&lt;/DIV&gt;&lt;/DIV&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Run Your Modules&lt;/STRONG&gt;: Execute your Python scripts or modules from the command line or within a larger application, and the Spark session will be initialized and used as needed.&lt;/P&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;A href="https://takesurvery.com/" target="_self"&gt;benrich&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 29 Jun 2024 08:42:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/76148#M35155</guid>
      <dc:creator>benrich</dc:creator>
      <dc:date>2024-06-29T08:42:45Z</dc:date>
    </item>
    <item>
      <title>Re: How can I start SparkSession out of Notebook?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/76182#M35157</link>
      <description>&lt;P&gt;Just overtake Databricks sparksession.&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import SparkSession
spark = SparkSession.getActiveSession()&lt;/LI-CODE&gt;</description>
      <pubDate>Sun, 30 Jun 2024 09:39:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-start-sparksession-out-of-notebook/m-p/76182#M35157</guid>
      <dc:creator>jacovangelder</dc:creator>
      <dc:date>2024-06-30T09:39:05Z</dc:date>
    </item>
  </channel>
</rss>

