ModuleNotFoundError: No module named 'mlflow' when running a notebook

bluetail
Contributor

I am running a notebook on the Coursera platform.

my configuration file, Classroom-Setup, looks like this:

%python
 
spark.conf.set("com.databricks.training.module-name", "deep-learning")
spark.conf.set("com.databricks.training.expected-dbr", "6.4")
 
spark.conf.set("com.databricks.training.suppress.untilStreamIsReady", "true")
spark.conf.set("com.databricks.training.suppress.stopAllStreams", "true")
spark.conf.set("com.databricks.training.suppress.moduleName", "true")
spark.conf.set("com.databricks.training.suppress.lessonName", "true")
# spark.conf.set("com.databricks.training.suppress.username", "true")
spark.conf.set("com.databricks.training.suppress.userhome", "true")
# spark.conf.set("com.databricks.training.suppress.workingDir", "true")
spark.conf.set("com.databricks.training.suppress.databaseName", "true")
 
import warnings
warnings.filterwarnings("ignore")
 
#import tensorflow
 
def display_run_uri(experiment_id, run_id):
    host_name = dbutils.notebook.entry_point.getDbutils().notebook().getContext().tags().get("browserHostName").get()
    uri = "https://{}/#mlflow/experiments/{}/runs/{}".format(host_name,experiment_id,run_id)
    displayHTML("""<b>Run URI:</b> <a href="{}">{}</a>""".format(uri,uri))
 
def waitForMLflow():
  try:
    import mlflow; 
    if int(mlflow.__version__.split(".")[1]) >= 2:
        print("""The module "mlflow" is attached and ready to go.""");
    else:
        print("""You need MLflow version 1.2.0+ installed.""")
  except ModuleNotFoundError:
    print("""The module "mlflow" is not yet attached to the cluster, waiting...""");
    while True:
      try: import mlflow; print("""The module "mlflow" is attached and ready to go."""); break;
      except ModuleNotFoundError: import time; time.sleep(1); print(".", end="");
 
 
from sklearn.metrics import confusion_matrix,f1_score,accuracy_score,fbeta_score,precision_score,recall_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils.multiclass import unique_labels
 
def plot_confusion_matrix(y_true, y_pred, classes,
                          title=None,
                          cmap=plt.cm.Blues):
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots()
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')
 
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")
 
    fmt = 'd'
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], fmt),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return fig
 
np.set_printoptions(precision=2)
 
displayHTML("Preparing the learning environment...")

I have no issues running this command,

%run "./Includes/Classroom-Setup" , as it says all the functions have been defined.

then when I am running this,

%python

import mlflow

import mlflow.spark

in the next cell, I am getting a ModelNotFoundError:

ModuleNotFoundError                       Traceback (most recent call last)
<command-1419217929106651> in <module>
----> 1 import mlflow
      2 import mlflow.spark
 
/databricks/python_shell/dbruntime/PythonPackageImportsInstrumentation/__init__.py in import_patch(name, globals, locals, fromlist, level)
    156             # Import the desired module. If you’re seeing this while debugging a failed import,
    157             # look at preceding stack frames for relevant error information.
--> 158             original_result = python_builtin_import(name, globals, locals, fromlist, level)
    159 
    160             is_root_import = thread_local._nest_level == 1
 
ModuleNotFoundError: No module named 'mlflow'

What is the cause of this and how can I fix it? Unfortunately, Coursera is not helpful with this particular course.

Thank you, I am new to Databricks.