cancel
Showing results for 
Search instead for 
Did you mean: 
Machine Learning
Dive into the world of machine learning on the Databricks platform. Explore discussions on algorithms, model training, deployment, and more. Connect with ML enthusiasts and experts.
cancel
Showing results for 
Search instead for 
Did you mean: 

AutoGluon MLflow integration

cleversuresh
New Contributor III

I am working on a personalized price package recommendation and implemented an AutoGluon code integrating it with MLflow.

The code has been created in a modular fashion to be used by other team members. They just need to pass the data, target column and experiment name to create the experiment.

I always face some problems when logging the model with MLflow, any help would be greatly appreciated.

This is my code:

class AutoGluonPyFuncWrapper(mlflow.pyfunc.PythonModel):
"""Wrapper for AutoGluon model to be logged as a PyFunc model in MLflow."""

def __init__(self, model_path):
self.model_path = model_path
self.predictor = None # Model will be loaded in predict method

def load_context(self, context):
"""Loads the AutoGluon model when MLflow loads the PyFunc model."""
self.predictor = TabularPredictor.load(self.model_path)

def predict(self, context, model_input):
"""
Predict probability scores for the given input.

model_input: Pandas DataFrame
Returns: Pandas DataFrame with probability scores
"""
if isinstance(model_input, pd.DataFrame):
predictions = self.predictor.predict_proba(model_input)
else:
predictions = self.predictor.predict_proba(pd.DataFrame(model_input))

# Get the class label for positive class dynamically
positive_class = predictions.columns[-1] # Last column is usually the positive class
return predictions[[positive_class]] # Return only probability of positive class


class AutoGluonMLflowClassifier:
def __init__(self, model_data: pd.DataFrame, target_col: str, experiment_name: str):
"""
Initializes the classifier with Databricks table name, target column, and MLflow experiment name.
"""
self.model_data = model_data
self.target_col = target_col
self.experiment_name = experiment_name
self.predictor = None
self.train_predictions = None
self.val_predictions = None
self._initialize_mlflow()

def _initialize_mlflow(self):
"""Sets up MLflow experiment dynamically in Databricks."""

# Define experiment path (store it in the user's workspace)
experiment_path = f"/Shared/automl_experiments/{self.experiment_name}"

# Check if the experiment already exists
experiment = mlflow.get_experiment_by_name(experiment_path)

if experiment is None:
# Create a new experiment if it does not exist
experiment_id = mlflow.create_experiment(experiment_path)
print(f"Created new MLflow experiment at: {experiment_path}")
else:
experiment_id = experiment.experiment_id
print(f"Using existing MLflow experiment: {experiment_path}")

# Set the experiment to use
mlflow.set_experiment(experiment_path)

def split_data(self):

self.train_data, self.val_data = train_test_split(self.model_data, test_size=0.2, random_state=42)
print(self.train_data.columns)
def train_model(self, time_limit: int = 200):
"""Trains AutoGluon model and logs parameters, metrics, and artifacts in MLflow."""
hyperparameters = {
"GBM": { # LightGBM
"num_boost_round": 1000, # More boosting rounds
"learning_rate": 0.02, # Lower learning rate for better generalization
"num_leaves": 31, # Leaf complexity
"feature_fraction": 0.8, # Feature bagging
"bagging_fraction": 0.8, # Sample bagging
"bagging_freq": 5, # Frequency of bagging
"min_data_in_leaf": 20, # Minimum samples per leaf
},
"XGB": { # XGBoost
"n_estimators": 1000,
"learning_rate": 0.02,
"max_depth": 6, # Controls complexity
"subsample": 0.8, # Sample fraction per tree
"colsample_bytree": 0.8, # Feature bagging
"gamma": 0.2, # Regularization
"lambda": 1, # L2 regularization
},
"CAT": { # CatBoost
"iterations": 1000,
"learning_rate": 0.02,
"depth": 6,
"l2_leaf_reg": 3, # L2 regularization
"border_count": 32, # Number of bins for numeric features
},
"NN_TORCH": { # Neural Network (PyTorch)
"num_epochs": 100, # Increase training epochs
"learning_rate": 0.001,
"dropout_prob": 0.1, # Dropout regularization
"weight_decay": 1e-5, # L2 weight regularization
"hidden_size": 256, # Hidden layer size
}
}

dbfs_model_path = "dbfs:/FileStore/automl/autogluon/"
local_model_path = "/Shared/automl_experiments/autogluon_model/"

with mlflow.start_run() as run:
# Training AutoGluon model with AUC as the evaluation metric
self.predictor = TabularPredictor(problem_type = "binary",
label = self.target_col,
eval_metric = "roc_auc",
path = local_model_path) \
.fit(self.train_data,
excluded_model_types = ["KNN", "RF"],
hyperparameters = hyperparameters,
presets = "best_quality",
num_bag_folds = 3,
num_stack_levels = 1,
time_limit = time_limit,
verbosity = 1, # Reduce logs
num_cpus = 4, # Limit CPU usage
num_gpus = 0,
ag_args_fit = {"num_cpus": 1, "num_gpus": 0} # Ensure sequential training
)

print(f"Model saved at: {local_model_path}")
dbutils.fs.rm(dbfs_model_path, recurse=True)
dbutils.fs.cp(f"file:{local_model_path}", dbfs_model_path, recurse=True)

# log dataset size
mlflow.log_params({"trainning_data_size": self.train_data.shape[0],
"validation_data_size": self.val_data.shape[0]})

# Making predictions on training and validation datasets
self.train_predictions = self.predictor.predict_proba(self.train_data.drop(columns = [self.target_col])).iloc[:, -1] # Get probabilities for positive class
self.val_predictions = self.predictor.predict_proba(self.val_data.drop(columns = [self.target_col])).iloc[:, -1] # Get probabilities for positive class
print("Training predictions:", self.train_predictions)

# Compute and log both training and validation metrics
self.compute_metrics(self.train_data[self.target_col], self.train_predictions, "train")
self.compute_metrics(self.val_data[self.target_col], self.val_predictions, "validation")

print("Logging model to MLflow...")
# generate the model signature
signature = infer_signature(model_input = self.train_data.drop(columns = [self.target_col]),
model_output = self.train_predictions)

model_wrapper = AutoGluonPyFuncWrapper(local_model_path)
artifacts = {"predictor_path": dbfs_model_path}
mlflow.pyfunc.log_model(artifact_path = "model",
python_model = model_wrapper,
input_example = self.X_train[:2],
signature = signature,
artifacts = artifacts)

self.run_id = run.info.run_id # Store run ID
print(f"Model logged successfully. Run ID: {self.run_id}")

# Calculating classification report
report = classification_report(self.val_data.drop(columns = [self.target_col]), self.val_predictions.round(), output_dict=True)
mlflow.log_dict(report, "classification_report.json")

# Define metric calculation function
def compute_metrics(self, y_true, y_pred, prefix):
"""Computes and logs metrics with a specified prefix (train/validation)."""
metrics = {
f"{prefix}_auc": roc_auc_score(y_true, y_pred),
f"{prefix}_average_precision": average_precision_score(y_true, y_pred),
f"{prefix}_f1_score": f1_score(y_true, y_pred > 0.5),
f"{prefix}_f2_score": fbeta_score(y_true, y_pred > 0.5, beta=2.0),
f"{prefix}_brier_score": brier_score_loss(y_true, y_pred > 0.5),
f"{prefix}_recall": recall_score(y_true, y_pred > 0.5),
f"{prefix}_precision": precision_score(y_true, y_pred > 0.5),
}
for metric_name, value in metrics.items():
mlflow.log_metric(metric_name, value)
return metrics

def evaluate_model(self):
"""Evaluate the model using AUC metric."""
y_pred_proba = self.predictor.predict_proba(self.X_train).iloc[:, -1]
auc_score = roc_auc_score(self.y_true, y_pred_proba)
print(f"Model AUC: {auc_score:.4f}")
return auc_score

def run_pipeline(self):
"""Complete pipeline: data generation, training, evaluation, logging, and loading."""
self.split_data()

self.train_model()
auc_score = self.evaluate_model()


from ucimlrepo import fetch_ucirepo

# fetch dataset
adult = fetch_ucirepo(id=2)

# data (as pandas dataframes)
X = adult.data.features
y = adult.data.targets

data = X.copy()
data['income'] = y['income']
data.head()
data['income'] = data['income'].replace({'<=50K.': '<=50K', '>50K.': '>50K'})
data['income'] = data['income'].replace({'<=50K': 0, '>50K': 1})
data['income'].value_counts()

# Example Usage:
classifier = AutoGluonMLflowClassifier(model_data = data,
target_col = "income",
experiment_name = "autogluon_sample_experiment")
classifier.run_pipeline()

1 ACCEPTED SOLUTION

Accepted Solutions

stbjelcevic
Databricks Employee
Databricks Employee

Hi @cleversuresh 

Thanks for sharing the code and the context. Here are the core issues I see and how to fix them so MLflow logging works reliably on Databricks.

What’s breaking MLflow logging in your code

  • Your PyFunc wrapper loads the AutoGluon model from a local path rather than from the MLflow model’s packaged artifacts. In PythonModel.load_context, you must read any files from context.artifacts[...]. Otherwise, loading or serving the model will fail when that local path doesn’t exist in the target environment.

  • The input_example and signature inference are misaligned. You pass self.X_train[:2], but self.X_train is never defined; also input_example must match the schema you infer with infer_signature(model_input=..., model_output=...). Use a small slice of train_features (DataFrame with target dropped) for both signature and example.

  • classification_report arguments are incorrect. It expects y_true and y_pred (discrete labels), but you pass X as y_true and rounded probabilities as y_pred. Pass self.val_data[self.target_col] and (self.val_predictions > 0.5).astype(int) (or a tuned threshold) instead.

  • brier_score_loss expects probabilities, not thresholded predictions. Use the raw positive-class probabilities y_pred_proba (shape (n_samples,)) for Brier, not (y_pred > 0.5). If you need 0–1 range, set scale_by_half=True (binary default is usually auto).

  • evaluate_model uses undefined attributes (self.X_train, self.y_true). Use your stored train/validation splits and compute AUC with roc_auc_score(y_true, y_score) where y_score are positive-class probabilities.

  • The AutoGluon path pointing to /Shared/... is a workspace path, not a filesystem location. Use a real local/temp directory (for example via tempfile.mkdtemp()), then package it into MLflow model artifacts with artifacts={"ag_predictor": <local_dir>} and load with context.artifacts[...] in your PyFunc.

  • Make sure to set the MLflow experiment to a workspace path (like /Shared/...), which is supported on Databricks; if you want artifacts stored in UC Volumes, create the experiment with a UC volume artifact location.

  • Finally, ensure runtime dependencies (AutoGluon + its model backends, e.g., LightGBM, XGBoost, CatBoost) are present when loading/serving the model. Use conda_env or extra_pip_requirements in mlflow.pyfunc.log_model so MLflow reproduces the environment cleanly.

Here are some code patches:

1) Fix PyFunc wrapper to read from packages artifacts:

import mlflow
import pandas as pd
from mlflow.pyfunc import PythonModel
from autogluon.tabular import TabularPredictor

class AutoGluonPyFuncWrapper(PythonModel):
    """Wrapper for AutoGluon model to be logged as a PyFunc model in MLflow."""

    def __init__(self):
        self.predictor = None

    def load_context(self, context):
        # Load the predictor directory that was logged as an artifact
        predictor_dir = context.artifacts["ag_predictor"]
        self.predictor = TabularPredictor.load(predictor_dir)

    def predict(self, context, model_input):
        # Accept dict/list; convert to DataFrame
        if not isinstance(model_input, pd.DataFrame):
            model_input = pd.DataFrame(model_input)

        # Probability of the positive class
        proba_df = self.predictor.predict_proba(model_input)

        # Choose positive label robustly (prefer 1 if present)
        class_labels = list(proba_df.columns)
        pos_label = 1 if 1 in class_labels else class_labels[-1]
        return proba_df[pos_label]  # Pandas Series of positive-class probabilities

2) Log AutoGluon predictor directory as an MLflow artifact and align the signature

import tempfile
import mlflow
from mlflow.models.signature import infer_signature

# Choose a real local directory for AutoGluon training output
local_model_dir = tempfile.mkdtemp(prefix="ag_predictor_")

with mlflow.start_run() as run:
    # Train AutoGluon
    self.predictor = TabularPredictor(
        problem_type="binary",
        label=self.target_col,
        eval_metric="roc_auc",
        path=local_model_dir
    ).fit(
        self.train_data,
        excluded_model_types=["KNN", "RF"],
        hyperparameters=hyperparameters,
        presets="best_quality",
        num_bag_folds=3,
        num_stack_levels=1,
        time_limit=time_limit,
        verbosity=1,
        num_cpus=4,
        num_gpus=0,
        ag_args_fit={"num_cpus": 1, "num_gpus": 0}
    )

    # Compute train/val probabilities for metrics
    train_X = self.train_data.drop(columns=[self.target_col])
    val_X = self.val_data.drop(columns=[self.target_col])
    self.train_predictions = self.predictor.predict_proba(train_X).iloc[:, -1]
    self.val_predictions = self.predictor.predict_proba(val_X).iloc[:, -1]

    # Metrics (see patch 3 below)
    self.compute_metrics(self.train_data[self.target_col], self.train_predictions, "train")
    self.compute_metrics(self.val_data[self.target_col], self.val_predictions, "validation")

    # Signature and input_example must match the wrapper’s input/output
    input_example = train_X.head(2)
    signature = infer_signature(model_input=input_example, model_output=self.train_predictions.head(2))

    # Log PyFunc model and the trained predictor directory as artifact
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=AutoGluonPyFuncWrapper(),
        artifacts={"ag_predictor": local_model_dir},
        signature=signature,
        input_example=input_example,
        # Strongly recommended: pin pip requirements to include AutoGluon & backends
        extra_pip_requirements=[
            "mlflow>=2.8.0",  # adjust to your workspace runtime
            "autogluon.tabular>=1.1.0",  # pin your version
            "xgboost>=1.7.0",
            "lightgbm>=3.3.5",
            "catboost>=1.2"
        ],
    )

    self.run_id = run.info.run_id

3) Correct your metric logging

from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    f1_score,
    fbeta_score,
    brier_score_loss,
    recall_score,
    precision_score,
    classification_report
)

def compute_metrics(self, y_true, y_pred_proba, prefix):
    # y_pred_proba: probabilities of positive class
    y_pred_bin = (y_pred_proba > 0.5).astype(int)

    metrics = {
        f"{prefix}_auc": roc_auc_score(y_true, y_pred_proba),
        f"{prefix}_average_precision": average_precision_score(y_true, y_pred_proba),
        f"{prefix}_f1_score": f1_score(y_true, y_pred_bin),
        f"{prefix}_f2_score": fbeta_score(y_true, y_pred_bin, beta=2.0),
        f"{prefix}_brier_score": brier_score_loss(y_true, y_pred_proba),
        f"{prefix}_recall": recall_score(y_true, y_pred_bin),
        f"{prefix}_precision": precision_score(y_true, y_pred_bin),
    }
    for k, v in metrics.items():
        mlflow.log_metric(k, float(v))
    return metrics

def log_classification_report(self):
    # Use validation set labels and thresholded predictions
    y_true = self.val_data[self.target_col]
    y_pred_bin = (self.val_predictions > 0.5).astype(int)
    report = classification_report(y_true, y_pred_bin, output_dict=True)
    mlflow.log_dict(report, "classification_report.json")

4) Fix evaluate_model to use your stored splits

def evaluate_model(self):
    # Use the validation set probabilities already computed
    auc_score = roc_auc_score(self.val_data[self.target_col], self.val_predictions)
    print(f"Model AUC (validation): {auc_score:.4f}")
    return auc_score

A couple of Databricks-specific practices to keep this robust

  • Set the workspace experiment path once (recommended):
    mlflow.set_experiment(f"/Shared/automl_experiments/{self.experiment_name}"). If you want to store artifacts in UC Volumes, create the experiment with an artifact location at a UC Volume path first, then set it active by path.

  • Package all runtime deps with the model (pip/conda), especially AutoGluon and its tree learners. You can use extra_pip_requirements (shown above) or supply a conda_env dict if you prefer hard pinning Python and Conda channels.

  • Always load files via context.artifacts[...] in load_context. MLflow will download artifacts next to the model and pass you local paths at runtime; don’t assume workspace or DBFS paths exist when the model is rehydrated.

  • Align input_example with your signature and wrapper input type (DataFrame rows of features). Signature/input_example improves handoff, validation, and serving.

View solution in original post

1 REPLY 1

stbjelcevic
Databricks Employee
Databricks Employee

Hi @cleversuresh 

Thanks for sharing the code and the context. Here are the core issues I see and how to fix them so MLflow logging works reliably on Databricks.

What’s breaking MLflow logging in your code

  • Your PyFunc wrapper loads the AutoGluon model from a local path rather than from the MLflow model’s packaged artifacts. In PythonModel.load_context, you must read any files from context.artifacts[...]. Otherwise, loading or serving the model will fail when that local path doesn’t exist in the target environment.

  • The input_example and signature inference are misaligned. You pass self.X_train[:2], but self.X_train is never defined; also input_example must match the schema you infer with infer_signature(model_input=..., model_output=...). Use a small slice of train_features (DataFrame with target dropped) for both signature and example.

  • classification_report arguments are incorrect. It expects y_true and y_pred (discrete labels), but you pass X as y_true and rounded probabilities as y_pred. Pass self.val_data[self.target_col] and (self.val_predictions > 0.5).astype(int) (or a tuned threshold) instead.

  • brier_score_loss expects probabilities, not thresholded predictions. Use the raw positive-class probabilities y_pred_proba (shape (n_samples,)) for Brier, not (y_pred > 0.5). If you need 0–1 range, set scale_by_half=True (binary default is usually auto).

  • evaluate_model uses undefined attributes (self.X_train, self.y_true). Use your stored train/validation splits and compute AUC with roc_auc_score(y_true, y_score) where y_score are positive-class probabilities.

  • The AutoGluon path pointing to /Shared/... is a workspace path, not a filesystem location. Use a real local/temp directory (for example via tempfile.mkdtemp()), then package it into MLflow model artifacts with artifacts={"ag_predictor": <local_dir>} and load with context.artifacts[...] in your PyFunc.

  • Make sure to set the MLflow experiment to a workspace path (like /Shared/...), which is supported on Databricks; if you want artifacts stored in UC Volumes, create the experiment with a UC volume artifact location.

  • Finally, ensure runtime dependencies (AutoGluon + its model backends, e.g., LightGBM, XGBoost, CatBoost) are present when loading/serving the model. Use conda_env or extra_pip_requirements in mlflow.pyfunc.log_model so MLflow reproduces the environment cleanly.

Here are some code patches:

1) Fix PyFunc wrapper to read from packages artifacts:

import mlflow
import pandas as pd
from mlflow.pyfunc import PythonModel
from autogluon.tabular import TabularPredictor

class AutoGluonPyFuncWrapper(PythonModel):
    """Wrapper for AutoGluon model to be logged as a PyFunc model in MLflow."""

    def __init__(self):
        self.predictor = None

    def load_context(self, context):
        # Load the predictor directory that was logged as an artifact
        predictor_dir = context.artifacts["ag_predictor"]
        self.predictor = TabularPredictor.load(predictor_dir)

    def predict(self, context, model_input):
        # Accept dict/list; convert to DataFrame
        if not isinstance(model_input, pd.DataFrame):
            model_input = pd.DataFrame(model_input)

        # Probability of the positive class
        proba_df = self.predictor.predict_proba(model_input)

        # Choose positive label robustly (prefer 1 if present)
        class_labels = list(proba_df.columns)
        pos_label = 1 if 1 in class_labels else class_labels[-1]
        return proba_df[pos_label]  # Pandas Series of positive-class probabilities

2) Log AutoGluon predictor directory as an MLflow artifact and align the signature

import tempfile
import mlflow
from mlflow.models.signature import infer_signature

# Choose a real local directory for AutoGluon training output
local_model_dir = tempfile.mkdtemp(prefix="ag_predictor_")

with mlflow.start_run() as run:
    # Train AutoGluon
    self.predictor = TabularPredictor(
        problem_type="binary",
        label=self.target_col,
        eval_metric="roc_auc",
        path=local_model_dir
    ).fit(
        self.train_data,
        excluded_model_types=["KNN", "RF"],
        hyperparameters=hyperparameters,
        presets="best_quality",
        num_bag_folds=3,
        num_stack_levels=1,
        time_limit=time_limit,
        verbosity=1,
        num_cpus=4,
        num_gpus=0,
        ag_args_fit={"num_cpus": 1, "num_gpus": 0}
    )

    # Compute train/val probabilities for metrics
    train_X = self.train_data.drop(columns=[self.target_col])
    val_X = self.val_data.drop(columns=[self.target_col])
    self.train_predictions = self.predictor.predict_proba(train_X).iloc[:, -1]
    self.val_predictions = self.predictor.predict_proba(val_X).iloc[:, -1]

    # Metrics (see patch 3 below)
    self.compute_metrics(self.train_data[self.target_col], self.train_predictions, "train")
    self.compute_metrics(self.val_data[self.target_col], self.val_predictions, "validation")

    # Signature and input_example must match the wrapper’s input/output
    input_example = train_X.head(2)
    signature = infer_signature(model_input=input_example, model_output=self.train_predictions.head(2))

    # Log PyFunc model and the trained predictor directory as artifact
    mlflow.pyfunc.log_model(
        artifact_path="model",
        python_model=AutoGluonPyFuncWrapper(),
        artifacts={"ag_predictor": local_model_dir},
        signature=signature,
        input_example=input_example,
        # Strongly recommended: pin pip requirements to include AutoGluon & backends
        extra_pip_requirements=[
            "mlflow>=2.8.0",  # adjust to your workspace runtime
            "autogluon.tabular>=1.1.0",  # pin your version
            "xgboost>=1.7.0",
            "lightgbm>=3.3.5",
            "catboost>=1.2"
        ],
    )

    self.run_id = run.info.run_id

3) Correct your metric logging

from sklearn.metrics import (
    roc_auc_score,
    average_precision_score,
    f1_score,
    fbeta_score,
    brier_score_loss,
    recall_score,
    precision_score,
    classification_report
)

def compute_metrics(self, y_true, y_pred_proba, prefix):
    # y_pred_proba: probabilities of positive class
    y_pred_bin = (y_pred_proba > 0.5).astype(int)

    metrics = {
        f"{prefix}_auc": roc_auc_score(y_true, y_pred_proba),
        f"{prefix}_average_precision": average_precision_score(y_true, y_pred_proba),
        f"{prefix}_f1_score": f1_score(y_true, y_pred_bin),
        f"{prefix}_f2_score": fbeta_score(y_true, y_pred_bin, beta=2.0),
        f"{prefix}_brier_score": brier_score_loss(y_true, y_pred_proba),
        f"{prefix}_recall": recall_score(y_true, y_pred_bin),
        f"{prefix}_precision": precision_score(y_true, y_pred_bin),
    }
    for k, v in metrics.items():
        mlflow.log_metric(k, float(v))
    return metrics

def log_classification_report(self):
    # Use validation set labels and thresholded predictions
    y_true = self.val_data[self.target_col]
    y_pred_bin = (self.val_predictions > 0.5).astype(int)
    report = classification_report(y_true, y_pred_bin, output_dict=True)
    mlflow.log_dict(report, "classification_report.json")

4) Fix evaluate_model to use your stored splits

def evaluate_model(self):
    # Use the validation set probabilities already computed
    auc_score = roc_auc_score(self.val_data[self.target_col], self.val_predictions)
    print(f"Model AUC (validation): {auc_score:.4f}")
    return auc_score

A couple of Databricks-specific practices to keep this robust

  • Set the workspace experiment path once (recommended):
    mlflow.set_experiment(f"/Shared/automl_experiments/{self.experiment_name}"). If you want to store artifacts in UC Volumes, create the experiment with an artifact location at a UC Volume path first, then set it active by path.

  • Package all runtime deps with the model (pip/conda), especially AutoGluon and its tree learners. You can use extra_pip_requirements (shown above) or supply a conda_env dict if you prefer hard pinning Python and Conda channels.

  • Always load files via context.artifacts[...] in load_context. MLflow will download artifacts next to the model and pass you local paths at runtime; don’t assume workspace or DBFS paths exist when the model is rehydrated.

  • Align input_example with your signature and wrapper input type (DataFrame rows of features). Signature/input_example improves handoff, validation, and serving.

Join Us as a Local Community Builder!

Passionate about hosting events and connecting people? Help us grow a vibrant local community—sign up today to get started!

Sign Up Now