<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: DatabricksSession and SparkConf in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80522#M36054</link>
    <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/113678"&gt;@fdeba&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Maybe try to set them up after you acquire spark session with&amp;nbsp;&lt;A href="https://kb.databricks.com/data/get-and-set-spark-config#set-spark-configuration-properties" target="_self" rel="nofollow noopener noreferrer"&gt;spark.conf.set&lt;/A&gt; , for example using loop:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;# Initialize the configuration for the Spark session
confSettings = [
    ("spark.sql.legacy.timeParserPolicy", "CORRECTED"),
    ("spark.sql.mapKeyDedupPolicy", "LAST_WIN"),
    ("spark.sql.legacy.parquet.nanosAsLong", "true"),
]


# Initialize a Spark session
spark = DatabricksSession.builder \
    .profile("&amp;lt;profile-name&amp;gt;") \
    .config(conf=conf) \    
    .getOrCreate()

for conf in confSettings:
    spark.conf.set(conf[0], conf[1])&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 25 Jul 2024 10:36:47 GMT</pubDate>
    <dc:creator>szymon_dybczak</dc:creator>
    <dc:date>2024-07-25T10:36:47Z</dc:date>
    <item>
      <title>DatabricksSession and SparkConf</title>
      <link>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80517#M36049</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I want to initialize a Spark session using `DatabricksSession`. However, it seems not possible to call `.config()` and pass it a `SparkConf` instance. The following works:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;# Initialize the configuration for the Spark session
confSettings = [
    ("spark.sql.legacy.timeParserPolicy", "CORRECTED"),
    ("spark.sql.mapKeyDedupPolicy", "LAST_WIN"),
    ("spark.sql.legacy.parquet.nanosAsLong", "true"),
]
conf = SparkConf() \
    .setMaster("local") \
    .setAll(confSettings) \
    .setExecutorEnv(confSettings)

# Initialize a Spark session
spark = SparkSession.builder \
    .config(conf=conf) \
    .getOrCreate()&lt;/LI-CODE&gt;&lt;DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;But the following throws an error:&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;```&lt;BR /&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;# Initialize the configuration for the Spark session&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;confSettings &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; [&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; (&lt;/SPAN&gt;&lt;SPAN&gt;"spark.sql.legacy.timeParserPolicy"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"CORRECTED"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; (&lt;/SPAN&gt;&lt;SPAN&gt;"spark.sql.mapKeyDedupPolicy"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"LAST_WIN"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; (&lt;/SPAN&gt;&lt;SPAN&gt;"spark.sql.legacy.parquet.nanosAsLong"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;),&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;]&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;conf &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;SparkConf&lt;/SPAN&gt;&lt;SPAN&gt;() \&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;setMaster&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"local"&lt;/SPAN&gt;&lt;SPAN&gt;) \&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;setAll&lt;/SPAN&gt;&lt;SPAN&gt;(confSettings) \&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;setExecutorEnv&lt;/SPAN&gt;&lt;SPAN&gt;(confSettings)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;# Initialize a Spark session&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;spark &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; DatabricksSession.builder \&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;profile&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"&amp;lt;profile-name&amp;gt;"&lt;/SPAN&gt;&lt;SPAN&gt;) \&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;config&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;conf&lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt;conf) \ &amp;nbsp; &amp;nbsp;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; .&lt;/SPAN&gt;&lt;SPAN&gt;getOrCreate&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;```&lt;BR /&gt;&lt;BR /&gt;Is there another way to set configuration for a Spark session when using `DatabricksSession`?&lt;BR /&gt;&lt;BR /&gt;Thanks.&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Thu, 25 Jul 2024 09:45:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80517#M36049</guid>
      <dc:creator>fdeba</dc:creator>
      <dc:date>2024-07-25T09:45:57Z</dc:date>
    </item>
    <item>
      <title>Re: DatabricksSession and SparkConf</title>
      <link>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80522#M36054</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/113678"&gt;@fdeba&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Maybe try to set them up after you acquire spark session with&amp;nbsp;&lt;A href="https://kb.databricks.com/data/get-and-set-spark-config#set-spark-configuration-properties" target="_self" rel="nofollow noopener noreferrer"&gt;spark.conf.set&lt;/A&gt; , for example using loop:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;# Initialize the configuration for the Spark session
confSettings = [
    ("spark.sql.legacy.timeParserPolicy", "CORRECTED"),
    ("spark.sql.mapKeyDedupPolicy", "LAST_WIN"),
    ("spark.sql.legacy.parquet.nanosAsLong", "true"),
]


# Initialize a Spark session
spark = DatabricksSession.builder \
    .profile("&amp;lt;profile-name&amp;gt;") \
    .config(conf=conf) \    
    .getOrCreate()

for conf in confSettings:
    spark.conf.set(conf[0], conf[1])&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 25 Jul 2024 10:36:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80522#M36054</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2024-07-25T10:36:47Z</dc:date>
    </item>
    <item>
      <title>Re: DatabricksSession and SparkConf</title>
      <link>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80523#M36055</link>
      <description>&lt;P&gt;In almost all cases you don't need to create a new spark session, as Databricks will do it for you automatically.&lt;/P&gt;&lt;P&gt;If it's only about spark configurations, there are multiple ways to set it:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;A href="https://docs.databricks.com/en/compute/configure.html#spark-configuration" target="_self"&gt;Cluster settings&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;&lt;A href="https://kb.databricks.com/data/get-and-set-spark-config#set-spark-configuration-properties" target="_self"&gt;spark.conf.set&lt;/A&gt;&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Thu, 25 Jul 2024 10:27:06 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databrickssession-and-sparkconf/m-p/80523#M36055</guid>
      <dc:creator>Witold</dc:creator>
      <dc:date>2024-07-25T10:27:06Z</dc:date>
    </item>
  </channel>
</rss>

