topic Re: AutoGluon MLflow integration in Machine Learning

AutoGluon MLflow integration

cleversuresh — Fri, 28 Feb 2025 04:42:37 GMT

I am working on a personalized price package recommendation and implemented an AutoGluon code integrating it with MLflow.

The code has been created in a modular fashion to be used by other team members. They just need to pass the data, target column and experiment name to create the experiment.

I always face some problems when logging the model with MLflow, any help would be greatly appreciated.

This is my code:

class AutoGluonPyFuncWrapper(mlflow.pyfunc.PythonModel):
"""Wrapper for AutoGluon model to be logged as a PyFunc model in MLflow."""

def __init__(self, model_path):
self.model_path = model_path
self.predictor = None # Model will be loaded in predict method

def load_context(self, context):
"""Loads the AutoGluon model when MLflow loads the PyFunc model."""
self.predictor = TabularPredictor.load(self.model_path)

def predict(self, context, model_input):
"""
Predict probability scores for the given input.

model_input: Pandas DataFrame
Returns: Pandas DataFrame with probability scores
"""
if isinstance(model_input, pd.DataFrame):
predictions = self.predictor.predict_proba(model_input)
else:
predictions = self.predictor.predict_proba(pd.DataFrame(model_input))

# Get the class label for positive class dynamically
positive_class = predictions.columns[-1] # Last column is usually the positive class
return predictions[[positive_class]] # Return only probability of positive class

class AutoGluonMLflowClassifier:
def __init__(self, model_data: pd.DataFrame, target_col: str, experiment_name: str):
"""
Initializes the classifier with Databricks table name, target column, and MLflow experiment name.
"""
self.model_data = model_data
self.target_col = target_col
self.experiment_name = experiment_name
self.predictor = None
self.train_predictions = None
self.val_predictions = None
self._initialize_mlflow()

def _initialize_mlflow(self):
"""Sets up MLflow experiment dynamically in Databricks."""

# Define experiment path (store it in the user's workspace)
experiment_path = f"/Shared/automl_experiments/{self.experiment_name}"

# Check if the experiment already exists
experiment = mlflow.get_experiment_by_name(experiment_path)

if experiment is None:
# Create a new experiment if it does not exist
experiment_id = mlflow.create_experiment(experiment_path)
print(f"Created new MLflow experiment at: {experiment_path}")
else:
experiment_id = experiment.experiment_id
print(f"Using existing MLflow experiment: {experiment_path}")

# Set the experiment to use
mlflow.set_experiment(experiment_path)

def split_data(self):

self.train_data, self.val_data = train_test_split(self.model_data, test_size=0.2, random_state=42)
print(self.train_data.columns)
def train_model(self, time_limit: int = 200):
"""Trains AutoGluon model and logs parameters, metrics, and artifacts in MLflow."""
hyperparameters = {
"GBM": { # LightGBM
"num_boost_round": 1000, # More boosting rounds
"learning_rate": 0.02, # Lower learning rate for better generalization
"num_leaves": 31, # Leaf complexity
"feature_fraction": 0.8, # Feature bagging
"bagging_fraction": 0.8, # Sample bagging
"bagging_freq": 5, # Frequency of bagging
"min_data_in_leaf": 20, # Minimum samples per leaf
},
"XGB": { # XGBoost
"n_estimators": 1000,
"learning_rate": 0.02,
"max_depth": 6, # Controls complexity
"subsample": 0.8, # Sample fraction per tree
"colsample_bytree": 0.8, # Feature bagging
"gamma": 0.2, # Regularization
"lambda": 1, # L2 regularization
},
"CAT": { # CatBoost
"iterations": 1000,
"learning_rate": 0.02,
"depth": 6,
"l2_leaf_reg": 3, # L2 regularization
"border_count": 32, # Number of bins for numeric features
},
"NN_TORCH": { # Neural Network (PyTorch)
"num_epochs": 100, # Increase training epochs
"learning_rate": 0.001,
"dropout_prob": 0.1, # Dropout regularization
"weight_decay": 1e-5, # L2 weight regularization
"hidden_size": 256, # Hidden layer size
}
}

dbfs_model_path = "dbfs:/FileStore/automl/autogluon/"
local_model_path = "/Shared/automl_experiments/autogluon_model/"

with mlflow.start_run() as run:
# Training AutoGluon model with AUC as the evaluation metric
self.predictor = TabularPredictor(problem_type = "binary",
label = self.target_col,
eval_metric = "roc_auc",
path = local_model_path) \
.fit(self.train_data,
excluded_model_types = ["KNN", "RF"],
hyperparameters = hyperparameters,
presets = "best_quality",
num_bag_folds = 3,
num_stack_levels = 1,
time_limit = time_limit,
verbosity = 1, # Reduce logs
num_cpus = 4, # Limit CPU usage
num_gpus = 0,
ag_args_fit = {"num_cpus": 1, "num_gpus": 0} # Ensure sequential training
)

print(f"Model saved at: {local_model_path}")
dbutils.fs.rm(dbfs_model_path, recurse=True)
dbutils.fs.cp(f"file:{local_model_path}", dbfs_model_path, recurse=True)

# log dataset size
mlflow.log_params({"trainning_data_size": self.train_data.shape[0],
"validation_data_size": self.val_data.shape[0]})

# Making predictions on training and validation datasets
self.train_predictions = self.predictor.predict_proba(self.train_data.drop(columns = [self.target_col])).iloc[:, -1] # Get probabilities for positive class
self.val_predictions = self.predictor.predict_proba(self.val_data.drop(columns = [self.target_col])).iloc[:, -1] # Get probabilities for positive class
print("Training predictions:", self.train_predictions)

# Compute and log both training and validation metrics
self.compute_metrics(self.train_data[self.target_col], self.train_predictions, "train")
self.compute_metrics(self.val_data[self.target_col], self.val_predictions, "validation")

print("Logging model to MLflow...")
# generate the model signature
signature = infer_signature(model_input = self.train_data.drop(columns = [self.target_col]),
model_output = self.train_predictions)

model_wrapper = AutoGluonPyFuncWrapper(local_model_path)
artifacts = {"predictor_path": dbfs_model_path}
mlflow.pyfunc.log_model(artifact_path = "model",
python_model = model_wrapper,
input_example = self.X_train[:2],
signature = signature,
artifacts = artifacts)

self.run_id = run.info.run_id # Store run ID
print(f"Model logged successfully. Run ID: {self.run_id}")

# Calculating classification report
report = classification_report(self.val_data.drop(columns = [self.target_col]), self.val_predictions.round(), output_dict=True)
mlflow.log_dict(report, "classification_report.json")

# Define metric calculation function
def compute_metrics(self, y_true, y_pred, prefix):
"""Computes and logs metrics with a specified prefix (train/validation)."""
metrics = {
f"{prefix}_auc": roc_auc_score(y_true, y_pred),
f"{prefix}_average_precision": average_precision_score(y_true, y_pred),
f"{prefix}_f1_score": f1_score(y_true, y_pred > 0.5),
f"{prefix}_f2_score": fbeta_score(y_true, y_pred > 0.5, beta=2.0),
f"{prefix}_brier_score": brier_score_loss(y_true, y_pred > 0.5),
f"{prefix}_recall": recall_score(y_true, y_pred > 0.5),
f"{prefix}_precision": precision_score(y_true, y_pred > 0.5),
}
for metric_name, value in metrics.items():
mlflow.log_metric(metric_name, value)
return metrics

def evaluate_model(self):
"""Evaluate the model using AUC metric."""
y_pred_proba = self.predictor.predict_proba(self.X_train).iloc[:, -1]
auc_score = roc_auc_score(self.y_true, y_pred_proba)
print(f"Model AUC: {auc_score:.4f}")
return auc_score

def run_pipeline(self):
"""Complete pipeline: data generation, training, evaluation, logging, and loading."""
self.split_data()

self.train_model()
auc_score = self.evaluate_model()

from ucimlrepo import fetch_ucirepo

# fetch dataset
adult = fetch_ucirepo(id=2)

# data (as pandas dataframes)
X = adult.data.features
y = adult.data.targets

data = X.copy()
data['income'] = y['income']
data.head()
data['income'] = data['income'].replace({'<=50K.': '<=50K', '>50K.': '>50K'})
data['income'] = data['income'].replace({'<=50K': 0, '>50K': 1})
data['income'].value_counts()

# Example Usage:
classifier = AutoGluonMLflowClassifier(model_data = data,
target_col = "income",
experiment_name = "autogluon_sample_experiment")
classifier.run_pipeline()

Re: AutoGluon MLflow integration

stbjelcevic — Fri, 31 Oct 2025 15:57:11 GMT

Hi @cleversuresh

Thanks for sharing the code and the context. Here are the core issues I see and how to fix them so MLflow logging works reliably on Databricks.

What’s breaking MLflow logging in your code

Your PyFunc wrapper loads the AutoGluon model from a local path rather than from the MLflow model’s packaged artifacts. In PythonModel.load_context, you must read any files from context.artifacts[...]. Otherwise, loading or serving the model will fail when that local path doesn’t exist in the target environment.
The input_example and signature inference are misaligned. You pass self.X_train[:2], but self.X_train is never defined; also input_example must match the schema you infer with infer_signature(model_input=..., model_output=...). Use a small slice of train_features (DataFrame with target dropped) for both signature and example.
classification_report arguments are incorrect. It expects y_true and y_pred (discrete labels), but you pass X as y_true and rounded probabilities as y_pred. Pass self.val_data[self.target_col] and (self.val_predictions > 0.5).astype(int) (or a tuned threshold) instead.
brier_score_loss expects probabilities, not thresholded predictions. Use the raw positive-class probabilities y_pred_proba (shape (n_samples,)) for Brier, not (y_pred > 0.5). If you need 0–1 range, set scale_by_half=True (binary default is usually auto).
evaluate_model uses undefined attributes (self.X_train, self.y_true). Use your stored train/validation splits and compute AUC with roc_auc_score(y_true, y_score) where y_score are positive-class probabilities.
The AutoGluon path pointing to /Shared/... is a workspace path, not a filesystem location. Use a real local/temp directory (for example via tempfile.mkdtemp()), then package it into MLflow model artifacts with artifacts={"ag_predictor": <local_dir>} and load with context.artifacts[...] in your PyFunc.
Make sure to set the MLflow experiment to a workspace path (like /Shared/...), which is supported on Databricks; if you want artifacts stored in UC Volumes, create the experiment with a UC volume artifact location.
Finally, ensure runtime dependencies (AutoGluon + its model backends, e.g., LightGBM, XGBoost, CatBoost) are present when loading/serving the model. Use conda_env or extra_pip_requirements in mlflow.pyfunc.log_model so MLflow reproduces the environment cleanly.

Here are some code patches:

1) Fix PyFunc wrapper to read from packages artifacts:

import mlflow import pandas as pd from mlflow.pyfunc import PythonModel from autogluon.tabular import TabularPredictor class AutoGluonPyFuncWrapper(PythonModel): """Wrapper for AutoGluon model to be logged as a PyFunc model in MLflow.""" def __init__(self): self.predictor = None def load_context(self, context): # Load the predictor directory that was logged as an artifact predictor_dir = context.artifacts["ag_predictor"] self.predictor = TabularPredictor.load(predictor_dir) def predict(self, context, model_input): # Accept dict/list; convert to DataFrame if not isinstance(model_input, pd.DataFrame): model_input = pd.DataFrame(model_input) # Probability of the positive class proba_df = self.predictor.predict_proba(model_input) # Choose positive label robustly (prefer 1 if present) class_labels = list(proba_df.columns) pos_label = 1 if 1 in class_labels else class_labels[-1] return proba_df[pos_label] # Pandas Series of positive-class probabilities

2) Log AutoGluon predictor directory as an MLflow artifact and align the signature

import tempfile import mlflow from mlflow.models.signature import infer_signature # Choose a real local directory for AutoGluon training output local_model_dir = tempfile.mkdtemp(prefix="ag_predictor_") with mlflow.start_run() as run: # Train AutoGluon self.predictor = TabularPredictor( problem_type="binary", label=self.target_col, eval_metric="roc_auc", path=local_model_dir ).fit( self.train_data, excluded_model_types=["KNN", "RF"], hyperparameters=hyperparameters, presets="best_quality", num_bag_folds=3, num_stack_levels=1, time_limit=time_limit, verbosity=1, num_cpus=4, num_gpus=0, ag_args_fit={"num_cpus": 1, "num_gpus": 0} ) # Compute train/val probabilities for metrics train_X = self.train_data.drop(columns=[self.target_col]) val_X = self.val_data.drop(columns=[self.target_col]) self.train_predictions = self.predictor.predict_proba(train_X).iloc[:, -1] self.val_predictions = self.predictor.predict_proba(val_X).iloc[:, -1] # Metrics (see patch 3 below) self.compute_metrics(self.train_data[self.target_col], self.train_predictions, "train") self.compute_metrics(self.val_data[self.target_col], self.val_predictions, "validation") # Signature and input_example must match the wrapper’s input/output input_example = train_X.head(2) signature = infer_signature(model_input=input_example, model_output=self.train_predictions.head(2)) # Log PyFunc model and the trained predictor directory as artifact mlflow.pyfunc.log_model( artifact_path="model", python_model=AutoGluonPyFuncWrapper(), artifacts={"ag_predictor": local_model_dir}, signature=signature, input_example=input_example, # Strongly recommended: pin pip requirements to include AutoGluon & backends extra_pip_requirements=[ "mlflow>=2.8.0", # adjust to your workspace runtime "autogluon.tabular>=1.1.0", # pin your version "xgboost>=1.7.0", "lightgbm>=3.3.5", "catboost>=1.2" ], ) self.run_id = run.info.run_id

3) Correct your metric logging

from sklearn.metrics import ( roc_auc_score, average_precision_score, f1_score, fbeta_score, brier_score_loss, recall_score, precision_score, classification_report ) def compute_metrics(self, y_true, y_pred_proba, prefix): # y_pred_proba: probabilities of positive class y_pred_bin = (y_pred_proba > 0.5).astype(int) metrics = { f"{prefix}_auc": roc_auc_score(y_true, y_pred_proba), f"{prefix}_average_precision": average_precision_score(y_true, y_pred_proba), f"{prefix}_f1_score": f1_score(y_true, y_pred_bin), f"{prefix}_f2_score": fbeta_score(y_true, y_pred_bin, beta=2.0), f"{prefix}_brier_score": brier_score_loss(y_true, y_pred_proba), f"{prefix}_recall": recall_score(y_true, y_pred_bin), f"{prefix}_precision": precision_score(y_true, y_pred_bin), } for k, v in metrics.items(): mlflow.log_metric(k, float(v)) return metrics def log_classification_report(self): # Use validation set labels and thresholded predictions y_true = self.val_data[self.target_col] y_pred_bin = (self.val_predictions > 0.5).astype(int) report = classification_report(y_true, y_pred_bin, output_dict=True) mlflow.log_dict(report, "classification_report.json")

4) Fix evaluate_model to use your stored splits

def evaluate_model(self): # Use the validation set probabilities already computed auc_score = roc_auc_score(self.val_data[self.target_col], self.val_predictions) print(f"Model AUC (validation): {auc_score:.4f}") return auc_score

A couple of Databricks-specific practices to keep this robust

Set the workspace experiment path once (recommended):
mlflow.set_experiment(f"/Shared/automl_experiments/{self.experiment_name}"). If you want to store artifacts in UC Volumes, create the experiment with an artifact location at a UC Volume path first, then set it active by path.
Package all runtime deps with the model (pip/conda), especially AutoGluon and its tree learners. You can use extra_pip_requirements (shown above) or supply a conda_env dict if you prefer hard pinning Python and Conda channels.
Always load files via context.artifacts[...] in load_context. MLflow will download artifacts next to the model and pass you local paths at runtime; don’t assume workspace or DBFS paths exist when the model is rehydrated.
Align input_example with your signature and wrapper input type (DataFrame rows of features). Signature/input_example improves handoff, validation, and serving.