<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Whitelisting GraphFrame Jar files does not work for shared compute. in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/whitelisting-graphframe-jar-files-does-not-work-for-shared/m-p/62992#M32146</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I'm encountering a Py4JSecurityException while using the GraphFrames jar library in a job task with shared compute. Despite following all documentation to whitelist my jar libraries in Volumes and ensuring compatibility with my Spark and Scala versions, the issue persists.&lt;/P&gt;&lt;P&gt;I've confirmed that my Volumes directory is in the Unity Catalog's allowlist and Table ACLs are not enabled.&lt;/P&gt;&lt;P&gt;Given the size of my input data (~500 million records), using a non-distributed package like NetworkX isn't feasible due to OutofMemoryErrors. Disabling Py4JSecurityException isn't an option, and neither is continuing with single user clusters.&lt;/P&gt;&lt;P&gt;Any guidance on this issue would be greatly appreciated.&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;</description>
    <pubDate>Fri, 08 Mar 2024 06:45:39 GMT</pubDate>
    <dc:creator>spark_user1</dc:creator>
    <dc:date>2024-03-08T06:45:39Z</dc:date>
    <item>
      <title>Whitelisting GraphFrame Jar files does not work for shared compute.</title>
      <link>https://community.databricks.com/t5/data-engineering/whitelisting-graphframe-jar-files-does-not-work-for-shared/m-p/62992#M32146</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I'm encountering a Py4JSecurityException while using the GraphFrames jar library in a job task with shared compute. Despite following all documentation to whitelist my jar libraries in Volumes and ensuring compatibility with my Spark and Scala versions, the issue persists.&lt;/P&gt;&lt;P&gt;I've confirmed that my Volumes directory is in the Unity Catalog's allowlist and Table ACLs are not enabled.&lt;/P&gt;&lt;P&gt;Given the size of my input data (~500 million records), using a non-distributed package like NetworkX isn't feasible due to OutofMemoryErrors. Disabling Py4JSecurityException isn't an option, and neither is continuing with single user clusters.&lt;/P&gt;&lt;P&gt;Any guidance on this issue would be greatly appreciated.&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;</description>
      <pubDate>Fri, 08 Mar 2024 06:45:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/whitelisting-graphframe-jar-files-does-not-work-for-shared/m-p/62992#M32146</guid>
      <dc:creator>spark_user1</dc:creator>
      <dc:date>2024-03-08T06:45:39Z</dc:date>
    </item>
  </channel>
</rss>

