<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to install python package on spark cluster in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30667#M22257</link>
    <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;How can I install python packages on spark cluster? in local, I can use pip install.&lt;/P&gt;
&lt;P&gt;I want to use some external packages which is not installed on was spark cluster.&lt;/P&gt;
&lt;P&gt;Thanks for any suggestions.&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 14 Apr 2015 21:58:01 GMT</pubDate>
    <dc:creator>kidexp</dc:creator>
    <dc:date>2015-04-14T21:58:01Z</dc:date>
    <item>
      <title>How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30667#M22257</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;How can I install python packages on spark cluster? in local, I can use pip install.&lt;/P&gt;
&lt;P&gt;I want to use some external packages which is not installed on was spark cluster.&lt;/P&gt;
&lt;P&gt;Thanks for any suggestions.&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 14 Apr 2015 21:58:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30667#M22257</guid>
      <dc:creator>kidexp</dc:creator>
      <dc:date>2015-04-14T21:58:01Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30668#M22258</link>
      <description>&lt;P&gt;@kidexp​&amp;nbsp;&lt;/P&gt;&lt;P&gt;From the workspace dropdown, you can select New Library, and then select Python eggs or specify specific packages. Please see attached screenshots.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="screen-shot-2015-04-14-at-30305-pm.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2537i7E5B13D3166176E3/image-size/large?v=v2&amp;amp;px=999" role="button" title="screen-shot-2015-04-14-at-30305-pm.png" alt="screen-shot-2015-04-14-at-30305-pm.png" /&gt;&lt;/span&gt;﻿&lt;span class="lia-inline-image-display-wrapper" image-alt="34-screen-shot-2015-04-14-at-30248-pm"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2546i1217F20E0983FAC2/image-size/large?v=v2&amp;amp;px=999" role="button" title="34-screen-shot-2015-04-14-at-30248-pm" alt="34-screen-shot-2015-04-14-at-30248-pm" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 14 Apr 2015 22:05:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30668#M22258</guid>
      <dc:creator>arsalan1</dc:creator>
      <dc:date>2015-04-14T22:05:56Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30669#M22259</link>
      <description>&lt;P&gt;Thanks very much @Arsalan Tavakoli-Shiraji​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 14 Apr 2015 22:22:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30669#M22259</guid>
      <dc:creator>kidexp</dc:creator>
      <dc:date>2015-04-14T22:22:40Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30670#M22260</link>
      <description>&lt;P&gt;@Arsalan Tavakoli-Shiraji​&amp;nbsp; how do we attach it to a specific cluster programmatically (and not just all clusters by checking that box)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 03 May 2017 01:37:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30670#M22260</guid>
      <dc:creator>ChristianRodrig</dc:creator>
      <dc:date>2017-05-03T01:37:51Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30671#M22261</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;You can use the Databricks Libraries API to programmatically attach libraries to specific clusters. For more information: &lt;A href="https://docs.databricks.com/api/latest/libraries.html#install" target="test_blank"&gt;https://docs.databricks.com/api/latest/libraries.html#install&lt;/A&gt;&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 01 Aug 2018 19:25:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30671#M22261</guid>
      <dc:creator>dillon_bostwick</dc:creator>
      <dc:date>2018-08-01T19:25:15Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30672#M22262</link>
      <description>&lt;P&gt;Introduce Python bundle on flash group&lt;/P&gt;&lt;P&gt;Make a virtualenv only for your Flash hubs.&lt;/P&gt;&lt;P&gt;Each time you run a Flash work, run a new pip introduce of all your own in-house Python libraries. ...&lt;/P&gt;&lt;P&gt;Zoom up the site-bundles dir of the virtualenv. ...&lt;/P&gt;&lt;P&gt;Pass the single .compress document, containing your libraries and their conditions as a contention to - - py-records.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://blackbirdpackaging.com/" alt="https://blackbirdpackaging.com/" target="_blank"&gt;&lt;B&gt;Custom Boxes With Logo&lt;/B&gt;&lt;/A&gt;&lt;B&gt; | &lt;/B&gt;&lt;A href="https://blackbirdpackaging.com/product/custom-labels-and-stickers/" alt="https://blackbirdpackaging.com/product/custom-labels-and-stickers/" target="_blank"&gt;&lt;B&gt;Custom Labels and Stickers&lt;/B&gt;&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 22 Jun 2023 17:04:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/30672#M22262</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-06-22T17:04:04Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/97167#M39447</link>
      <description>&lt;P&gt;&lt;STRONG&gt;Use --py-files with Spark Submit&lt;/STRONG&gt;: Zip the package and add it using --py-files when you run spark-submit. For example:&lt;/P&gt;&lt;P&gt;spark-submit --py-files path/to/your_package.zip your_script.py&lt;/P&gt;</description>
      <pubDate>Fri, 01 Nov 2024 06:49:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/97167#M39447</guid>
      <dc:creator>Mikejerere</dc:creator>
      <dc:date>2024-11-01T06:49:05Z</dc:date>
    </item>
    <item>
      <title>Re: How to install python package on spark cluster</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/97169#M39448</link>
      <description>&lt;P&gt;If --py-files doesn’t work, try this shorter method:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Create a Conda Environment: Install your packages.&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;conda create -n myenv python=3.x&lt;BR /&gt;conda activate myenv&lt;BR /&gt;pip install your-package&lt;/PRE&gt;&lt;P&gt;&lt;STRONG&gt;Package and Submit: Use conda-pack and spark-submit with --archives.&lt;/STRONG&gt;&lt;/P&gt;&lt;PRE&gt;conda pack -n myenv -o myenv.tar.gz&lt;BR /&gt;spark-submit --archives myenv.tar.gz#myenv --conf spark.pyspark.python=myenv/bin/python your_script.py&lt;BR /&gt;&lt;BR /&gt;&lt;/PRE&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;P&gt;This runs your Spark job with the required packages.&lt;/P&gt;&lt;P&gt;Regards,&lt;BR /&gt;&lt;A title="Summa Marketing" href="http://conda%20create -n myenv python=3.x conda activate myenv pip install your-package" target="_blank" rel="noopener"&gt;Summa Marketing&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/LI&gt;&lt;/OL&gt;</description>
      <pubDate>Fri, 01 Nov 2024 06:53:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-install-python-package-on-spark-cluster/m-p/97169#M39448</guid>
      <dc:creator>Mikejerere</dc:creator>
      <dc:date>2024-11-01T06:53:54Z</dc:date>
    </item>
  </channel>
</rss>

