<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: I have to run the notebook in concurrently using process pool executor in python in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/i-have-to-run-the-notebook-in-concurrently-using-process-pool/m-p/67035#M33263</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103337"&gt;@ETLdeveloper&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;You can use the multithreading that help you to run notebook in parallel.&lt;/P&gt;&lt;P&gt;Attaching code for your reference -&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from concurrent.futures import ThreadPoolExecutor

class NotebookData:

  def __init__(self, path, timeout, parameters = None, retry = 0):

    self.path = path
    self.timeout = timeout
    self.parameters = parameters
    self.retry = retry

  def submit_notebook(notebook):
    # print("Running URL for Table : %s " % (notebook.parameters['url']))
    try:
      if (notebook.parameters):
        return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
      else:
        return dbutils.notebook.run(notebook.path, notebook.timeout)
    except Exception as e:
       if notebook.retry &amp;lt; 1:
        print("Failed For : ",notebook.parameters)
        raise
      
    # print("Retrying for : %s " % (notebook.parameters['url']))
    notebook.retry = notebook.retry - 1
    submit_notebook(notebook)

def parallel_notebooks(notebooks, parallel_thread):
    """
        If you create too many notebooks in parallel the driver may crash when you submit all of the jobs at once.
        This code limits the number of parallel notebooks.
    """
    with ThreadPoolExecutor(max_workers = parallel_thread) as ec:
        return [ec.submit(NotebookData.submit_notebook, notebook) for notebook in notebooks]


notebooks = [NotebookData(f"/Workspace/live/raw/test/abc", 3600, {"url" : f'{url}'}) for url in var_dict.values()]   

# #Array of instances of NotebookData Class
parallel_thread = 60

try : 
    res = parallel_notebooks(notebooks, parallel_thread)
    result = [i.result(timeout = 3600) for i in res] # This is a blocking call.
    print(result)  
except NameError as e :
    print(e)&lt;/LI-CODE&gt;</description>
    <pubDate>Tue, 23 Apr 2024 08:19:29 GMT</pubDate>
    <dc:creator>Ajay-Pandey</dc:creator>
    <dc:date>2024-04-23T08:19:29Z</dc:date>
    <item>
      <title>I have to run the notebook in concurrently using process pool executor in python</title>
      <link>https://community.databricks.com/t5/data-engineering/i-have-to-run-the-notebook-in-concurrently-using-process-pool/m-p/66922#M33233</link>
      <description>&lt;P&gt;Hello All,&lt;/P&gt;&lt;P&gt;My scenario required me to create a code that reads tables from the source catalog and writes them to the destination catalog using Spark. Doing one by one is not a good option when there are 300 tables in the catalog. So I am trying the process pool executor, which creates a separate process for each table and runs it concurrently as much as possible. When I created a code, it throws an error like "PythonUtils does not exist in the JVM". Essentially, I made two notebooks: one for the parent and one for the child. I'm going to share my code, could you advise me on how to solve and implement parallelism for my task?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;#parent notebook code&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; datetime &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; dt&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; concurrent.futures &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; ProcessPoolExecutor&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;def&lt;/SPAN&gt; &lt;SPAN&gt;execute_child_notebook&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;table_name&lt;/SPAN&gt;&lt;SPAN&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;try&lt;/SPAN&gt;&lt;SPAN&gt;:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Get the notebook path&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; notebook_path &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Execute the child notebook using dbutils.notebook.run&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; result &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; dbutils.notebook.&lt;/SPAN&gt;&lt;SPAN&gt;run&lt;/SPAN&gt;&lt;SPAN&gt;(notebook_path, &lt;/SPAN&gt;&lt;SPAN&gt;1800&lt;/SPAN&gt;&lt;SPAN&gt;, {&lt;/SPAN&gt;&lt;SPAN&gt;"table_names"&lt;/SPAN&gt;&lt;SPAN&gt;: table_name})&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;return&lt;/SPAN&gt; &lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"Execution result for &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;table_name&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;: &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;result&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;except&lt;/SPAN&gt; &lt;SPAN&gt;Exception&lt;/SPAN&gt; &lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; e:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;return&lt;/SPAN&gt; &lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"Error executing notebook for &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;table_name&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;: &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;e&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;if&lt;/SPAN&gt; &lt;SPAN&gt;__name__&lt;/SPAN&gt; &lt;SPAN&gt;==&lt;/SPAN&gt; &lt;SPAN&gt;"__main__"&lt;/SPAN&gt;&lt;SPAN&gt;:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Define the list of table names to process&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; table_names &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; [&lt;/SPAN&gt;&lt;SPAN&gt;"table1"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"table2"&lt;/SPAN&gt;&lt;SPAN&gt;]&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;try&lt;/SPAN&gt;&lt;SPAN&gt;:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Create a ProcessPoolExecutor with default number of workers&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;with&lt;/SPAN&gt; &lt;SPAN&gt;ProcessPoolExecutor&lt;/SPAN&gt;&lt;SPAN&gt;() &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; executor:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Map the execute_child_notebook function to process each table name in parallel&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; results &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; executor.&lt;/SPAN&gt;&lt;SPAN&gt;map&lt;/SPAN&gt;&lt;SPAN&gt;(execute_child_notebook, table_names)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Iterate over results and print them&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;for&lt;/SPAN&gt;&lt;SPAN&gt; result &lt;/SPAN&gt;&lt;SPAN&gt;in&lt;/SPAN&gt;&lt;SPAN&gt; results:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(result)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;except&lt;/SPAN&gt; &lt;SPAN&gt;Exception&lt;/SPAN&gt; &lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; e:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"Error: &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;e&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;#Child notebook code .....................................................................................................................&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; pyspark.sql &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; SparkSession&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; datetime &lt;/SPAN&gt;&lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; dt&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;SPAN&gt; concurrent.futures &lt;/SPAN&gt;&lt;SPAN&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; ProcessPoolExecutor&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;table_name &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; dbutils.widgets.&lt;/SPAN&gt;&lt;SPAN&gt;get&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"table_name"&lt;/SPAN&gt;&lt;SPAN&gt;) &amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;# Get the table_name from a widget&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;def&lt;/SPAN&gt; &lt;SPAN&gt;read_write_spark_data&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;table_name&lt;/SPAN&gt;&lt;SPAN&gt;&lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;try&lt;/SPAN&gt;&lt;SPAN&gt;:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; spark &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; SparkSession.builder.&lt;/SPAN&gt;&lt;SPAN&gt;appName&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"MyJob"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;getOrCreate&lt;/SPAN&gt;&lt;SPAN&gt;()&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Construct the fully qualified source table name&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; source_table &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"xxxxxxxxxxxxxxxxxxxxxxxxx.&lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;table_name&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Verify if the source table exists&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Read data from the source table&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; df &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; spark.read.&lt;/SPAN&gt;&lt;SPAN&gt;table&lt;/SPAN&gt;&lt;SPAN&gt;(source_table).&lt;/SPAN&gt;&lt;SPAN&gt;limit&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;100&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Construct the fully qualified target table name&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; target_table &lt;/SPAN&gt;&lt;SPAN&gt;=&lt;/SPAN&gt; &lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"yyyyyyyyyyyyyyyyyyyyyyyyy.&lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;table_name&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;# Write data to the target table&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; df.write.&lt;/SPAN&gt;&lt;SPAN&gt;saveAsTable&lt;/SPAN&gt;&lt;SPAN&gt;(target_table)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;return&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"Successfully read data from '&lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;source_table&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;' and wrote to '&lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;target_table&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;'."&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;except&lt;/SPAN&gt; &lt;SPAN&gt;Exception&lt;/SPAN&gt; &lt;SPAN&gt;as&lt;/SPAN&gt;&lt;SPAN&gt; e:&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;print&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;f&lt;/SPAN&gt;&lt;SPAN&gt;"Error: &lt;/SPAN&gt;&lt;SPAN&gt;{&lt;/SPAN&gt;&lt;SPAN&gt;e&lt;/SPAN&gt;&lt;SPAN&gt;}&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;# Execute the read_write_spark_data function&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;read_write_spark_data&lt;/SPAN&gt;&lt;SPAN&gt;(table_name)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Mon, 22 Apr 2024 12:16:44 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/i-have-to-run-the-notebook-in-concurrently-using-process-pool/m-p/66922#M33233</guid>
      <dc:creator>ETLdeveloper</dc:creator>
      <dc:date>2024-04-22T12:16:44Z</dc:date>
    </item>
    <item>
      <title>Re: I have to run the notebook in concurrently using process pool executor in python</title>
      <link>https://community.databricks.com/t5/data-engineering/i-have-to-run-the-notebook-in-concurrently-using-process-pool/m-p/67035#M33263</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/103337"&gt;@ETLdeveloper&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;You can use the multithreading that help you to run notebook in parallel.&lt;/P&gt;&lt;P&gt;Attaching code for your reference -&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from concurrent.futures import ThreadPoolExecutor

class NotebookData:

  def __init__(self, path, timeout, parameters = None, retry = 0):

    self.path = path
    self.timeout = timeout
    self.parameters = parameters
    self.retry = retry

  def submit_notebook(notebook):
    # print("Running URL for Table : %s " % (notebook.parameters['url']))
    try:
      if (notebook.parameters):
        return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
      else:
        return dbutils.notebook.run(notebook.path, notebook.timeout)
    except Exception as e:
       if notebook.retry &amp;lt; 1:
        print("Failed For : ",notebook.parameters)
        raise
      
    # print("Retrying for : %s " % (notebook.parameters['url']))
    notebook.retry = notebook.retry - 1
    submit_notebook(notebook)

def parallel_notebooks(notebooks, parallel_thread):
    """
        If you create too many notebooks in parallel the driver may crash when you submit all of the jobs at once.
        This code limits the number of parallel notebooks.
    """
    with ThreadPoolExecutor(max_workers = parallel_thread) as ec:
        return [ec.submit(NotebookData.submit_notebook, notebook) for notebook in notebooks]


notebooks = [NotebookData(f"/Workspace/live/raw/test/abc", 3600, {"url" : f'{url}'}) for url in var_dict.values()]   

# #Array of instances of NotebookData Class
parallel_thread = 60

try : 
    res = parallel_notebooks(notebooks, parallel_thread)
    result = [i.result(timeout = 3600) for i in res] # This is a blocking call.
    print(result)  
except NameError as e :
    print(e)&lt;/LI-CODE&gt;</description>
      <pubDate>Tue, 23 Apr 2024 08:19:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/i-have-to-run-the-notebook-in-concurrently-using-process-pool/m-p/67035#M33263</guid>
      <dc:creator>Ajay-Pandey</dc:creator>
      <dc:date>2024-04-23T08:19:29Z</dc:date>
    </item>
  </channel>
</rss>

