Hi,
Been working on some parallel notebook code, which I have ported to python from the example on the DB website and added some exception handling and that works fine. What I would like to do is paramterise the input but am not succeeding as the function does not seem to accept that what I am passing in is a list, even though if I check the type of the variable it is indeed a list
from concurrent.futures import ThreadPoolExecutor
class NotebookData:
def __init__(self, path, timeout, parameters=None, retry=0):
self.path = path
self.timeout = timeout
self.parameters = parameters
self.retry = retry
def submitNotebook(notebook):
print("Running notebook %s" % notebook.path)
try:
if (notebook.parameters):
return dbutils.notebook.run(notebook.path, notebook.timeout, notebook.parameters)
else:
return dbutils.notebook.run(notebook.path, notebook.timeout)
except Exception:
if notebook.retry < 1:
raise
print("Retrying notebook %s" % notebook.path)
notebook.retry = notebook.retry - 1
submitNotebook(notebook)
def parallelNotebooks(notebooks, numInParallel):
# If you create too many notebooks in parallel the driver may crash when you submit all of the jobs at once.
# This code limits the number of parallel notebooks.
with ThreadPoolExecutor(max_workers=numInParallel) as ec:
return [ec.submit(NotebookData.submitNotebook, notebook) for notebook in notebooks]
notebooks = noteBookConfig
#[NotebookData("Sub1", 1200), NotebookData("Sub2", 1200, retry=2)]
print (type(notebooks))
print(notebooks)
res = NotebookData.parallelNotebooks(notebooks, 2)
result = [f.result(timeout=3600) for f in res] # This is a blocking call.
print(result)
The commented out line (31) is the format of the list, noteBookConfig is the list I am passing in. That is created like this:
import pandas as pd
dbutils.widgets.text("configs", "", "Notebook Config")
noteBookConfig= dbutils.widgets.get("configs")
noteBookConfig = list(noteBookConfig.split(" "))
print (noteBookConfig)
print (type(noteBookConfig))
Any help greatly appreciated.