<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Can python futures utilise all cluster nodes ? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15723#M10016</link>
    <description>&lt;P&gt;You can create a init script and then add it during cluster start up ​&lt;/P&gt;</description>
    <pubDate>Wed, 29 Jun 2022 16:44:17 GMT</pubDate>
    <dc:creator>Keyuri</dc:creator>
    <dc:date>2022-06-29T16:44:17Z</dc:date>
    <item>
      <title>Can python futures utilise all cluster nodes ?</title>
      <link>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15721#M10014</link>
      <description>&lt;P&gt;I used python futures to call a function multiple times concurrently, however I am not sure if all nodes is utilised or how to make sure it use all cluster nodes.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Can you confirm if I create a cluster with 5 works each with 8 memory cores for example. does that mean I can run 5 x 8 concurrent tasks ? &lt;/P&gt;&lt;P&gt;or the futures and python will use only the main node for each tasks ?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Code example:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;# assuming 5 workers each with 8 cores
executor = ProcessPoolExecutor(5 * 8)
&amp;nbsp;
def tester():
   # code to run any parallel task
   return result
&amp;nbsp;
for index in range(10000):
   executor.submit(tester)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;B&gt;In other words, can the python futures or any python threading library use all cluster workers cpus ? &lt;/B&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jun 2022 09:51:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15721#M10014</guid>
      <dc:creator>Alex0101</dc:creator>
      <dc:date>2022-06-29T09:51:47Z</dc:date>
    </item>
    <item>
      <title>Re: Can python futures utilise all cluster nodes ?</title>
      <link>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15722#M10015</link>
      <description>&lt;P&gt;it will run on the main node (driver) only.&lt;/P&gt;&lt;P&gt;You need some kind of cluster management framework to distribute the work over node, like Yarn, Spark Dask, Ray etc&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you would use pyspark, then you can leverage the parallel processing of Spark and it would indeed run over multiple nodes, if your function uses Spark.&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jun 2022 10:36:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15722#M10015</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-06-29T10:36:57Z</dc:date>
    </item>
    <item>
      <title>Re: Can python futures utilise all cluster nodes ?</title>
      <link>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15723#M10016</link>
      <description>&lt;P&gt;You can create a init script and then add it during cluster start up ​&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jun 2022 16:44:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15723#M10016</guid>
      <dc:creator>Keyuri</dc:creator>
      <dc:date>2022-06-29T16:44:17Z</dc:date>
    </item>
    <item>
      <title>Re: Can python futures utilise all cluster nodes ?</title>
      <link>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15724#M10017</link>
      <description>&lt;P&gt;Can you elaborate, which init script to add to the cluster ?&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jun 2022 17:52:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/can-python-futures-utilise-all-cluster-nodes/m-p/15724#M10017</guid>
      <dc:creator>Alex0101</dc:creator>
      <dc:date>2022-06-29T17:52:22Z</dc:date>
    </item>
  </channel>
</rss>

