Unable to register Scikit-learn or XGBoost model to unity catalog

AlkaSaliss
New Contributor II

Hello, I'm following the tutorial provided here https://docs.databricks.com/aws/en/notebooks/source/mlflow/mlflow-classic-ml-e2e-mlflow-3.html for ML model management process using ML FLow, in a unity-catalog enabled workspace, however I'm facing an error I'm unable to understand and fix.

The error is specifically occurring in the section 4 of the tutorial:  Log the model using MLflow:

# Incorporate MLflow evaluation
evaluation_data = X_test.copy()
evaluation_data["label"] = y_test

# Log the model and training metadata results
with mlflow.start_run(run_name="my_xgboost_regression") as run:
    # Extract metrics
    final_train_rmse = np.array(reg.evals_result()["validation_0"]["rmse"])[-1]
    final_test_rmse = np.array(reg.evals_result()["validation_1"]["rmse"])[-1]

    # Extract parameters for logging
    feature_map = {key: value for key, value in reg.get_xgb_params().items() if value is not None}

    # Generate a model signature using the infer_signature utility in MLflow
    # A signature is required to register the model to Unity Catalog 
    # so that the model can be used in SQL queries
    signature = infer_signature(X, reg.predict(X))

    # Log parameters
    mlflow.log_params(feature_map)

    # Log the model to MLflow and register the model to Unity Catalog
    # All model metrics and parameters will be available in Unity Catalog
    model_info = mlflow.xgboost.log_model(
        xgb_model=reg,
        name="xgboost_regression_model",
        input_example=X.iloc[[0]],
        signature=signature,
        registered_model_name="my_catalog.y_schema.xgboost_regression_model",
    )

Here's the stack trace of model logging error:

🔗 View Logged Model at: https://dbc-my-ws-account.cloud.databricks.com/ml/experiments/2706846431648976/models/m-782e22e859f24a63a29e0d9ee9227a6a?o=1366243967608557
/local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/xgboost/sklearn.py:1028: UserWarning: [08:35:04] WARNING: /workspace/src/c_api/c_api.cc:1427: Saving model in the UBJSON format as default.  You can use file extension: `json`, `ubj` or `deprecated` to choose between formats.
  self.get_booster().save_model(fname)
Successfully registered model 'my_catalag.my_schema.xgboost_regression_model'.

MlflowException: The following failures occurred while uploading one or more artifacts to s3://my-s3-catalog-bucket/models/95d12042-52cb-4348-a05f-4bbdfe0c71f1/versions/814a5856-83df-4ed2-b4df-dc3dfc5c34ad: {'/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/serving_input_example.json': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/conda.yaml': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/requirements.txt': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/MLmodel': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/input_example.json': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/model.xgb': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/python_env.yaml': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/metadata/conda.yaml': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/metadata/requirements.txt': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/metadata/MLmodel': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')", '/local_disk0/repl_tmp_data/ReplId-198e0-2fc1c-a/tmp4w84_er0/model/metadata/python_env.yaml': "JSONDecodeError('Expecting value: line 1 column 1 (char 0)')"}
File <command-2706846431648989>, line 24
     20 mlflow.log_params(feature_map)
     22 # Log the model to MLflow and register the model to Unity Catalog
     23 # All model metrics and parameters will be available in Unity Catalog
---> 24 model_info = mlflow.xgboost.log_model(
     25     xgb_model=reg,
     26     name="xgboost_regression_model",
     27     input_example=X.iloc[[0]],
     28     signature=signature,
     29     registered_model_name="my_catalog.my_schema.xgboost_regression_model",
     30 )
     32 # Log metrics to the run and model
     33 mlflow.log_metric("train_rmse", final_train_rmse)
File <command-2706846431648989>, line 24
     20 mlflow.log_params(feature_map)
     22 # Log the model to MLflow and register the model to Unity Catalog
     23 # All model metrics and parameters will be available in Unity Catalog
---> 24 model_info = mlflow.xgboost.log_model(
     25     xgb_model=reg,
     26     name="xgboost_regression_model",
     27     input_example=X.iloc[[0]],
     28     signature=signature,
     29     registered_model_name="my_catalog.my_schema.xgboost_regression_model",
     30 )
     32 # Log metrics to the run and model
     33 mlflow.log_metric("train_rmse", final_train_rmse)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/xgboost/__init__.py:279, in log_model(xgb_model, artifact_path, conda_env, code_paths, registered_model_name, signature, input_example, await_registration_for, pip_requirements, extra_pip_requirements, model_format, metadata, name, params, tags, model_type, step, model_id, **kwargs)
    225 @format_docstring(LOG_MODEL_PARAM_DOCS.format(package_name=FLAVOR_NAME))
    226 def log_model(
    227     xgb_model,
   (...)
    245     **kwargs,
    246 ):
    247     """Log an XGBoost model as an MLflow artifact for the current run.
    248 
    249     Args:
   (...)
    277         metadata of the logged model.
    278     """
--> 279     return Model.log(
    280         artifact_path=artifact_path,
    281         name=name,
    282         flavor=mlflow.xgboost,
    283         registered_model_name=registered_model_name,
    284         xgb_model=xgb_model,
    285         model_format=model_format,
    286         conda_env=conda_env,
    287         code_paths=code_paths,
    288         signature=signature,
    289         input_example=input_example,
    290         await_registration_for=await_registration_for,
    291         pip_requirements=pip_requirements,
    292         extra_pip_requirements=extra_pip_requirements,
    293         metadata=metadata,
    294         params=params,
    295         tags=tags,
    296         model_type=model_type,
    297         step=step,
    298         model_id=model_id,
    299         **kwargs,
    300     )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/models/model.py:1357, in Model.log(cls, artifact_path, flavor, registered_model_name, await_registration_for, metadata, run_id, resources, auth_policy, prompts, name, model_type, params, tags, step, model_id, **kwargs)
   1354             _logger.warning("Failed to log model config as params: %s", str(e))
   1356 if registered_model_name is not None:
-> 1357     registered_model = mlflow.tracking._model_registry.fluent._register_model(
   1358         f"models:/{model.model_id}",
   1359         registered_model_name,
   1360         await_registration_for=await_registration_for,
   1361         local_model_path=local_path,
   1362     )
   1363 model_info = mlflow_model.get_model_info(model)
   1364 if registered_model is not None:
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/tracking/_model_registry/fluent.py:213, in _register_model(model_uri, name, await_registration_for, tags, local_model_path, env_pack)
    207     with pack_env_for_databricks_model_serving(
    208         model_uri,
    209         enforce_pip_requirements=True,
    210     ) as artifacts_path_with_env:
    211         client.log_model_artifacts(model_id, artifacts_path_with_env)
--> 213 create_version_response = client._create_model_version(
    214     name=name,
    215     source=source,
    216     run_id=run_id,
    217     tags=tags,
    218     await_creation_for=await_registration_for,
    219     local_model_path=local_model_path,
    220     model_id=model_id,
    221 )
    222 created_message = (
    223     f"Created version '{create_version_response.version}' of model "
    224     f"'{create_version_response.name}'"
    225 )
    226 # Print a link to the UC model version page if the model is in UC.
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/tracking/client.py:4198, in MlflowClient._create_model_version(self, name, source, run_id, tags, run_link, description, await_creation_for, local_model_path, model_id)
   4195         # models:/<model_id> source is not supported by WSMR
   4196         new_source = logged_model.artifact_location
-> 4198 return self._get_registry_client().create_model_version(
   4199     name=name,
   4200     source=new_source,
   4201     run_id=run_id,
   4202     tags=tags,
   4203     run_link=run_link,
   4204     description=description,
   4205     await_creation_for=await_creation_for,
   4206     local_model_path=local_model_path,
   4207     model_id=model_id,
   4208 )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/telemetry/track.py:22, in record_usage_event.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
     19 @functools.wraps(func)
     20 def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
     21     if is_telemetry_disabled() or _is_telemetry_disabled_for_event(event):
---> 22         return func(*args, **kwargs)
     24     success = True
     25     start_time = time.time()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/tracking/_model_registry/client.py:266, in ModelRegistryClient.create_model_version(self, name, source, run_id, tags, run_link, description, await_creation_for, local_model_path, model_id)
    264 arg_names = _get_arg_names(self.store.create_model_version)
    265 if "local_model_path" in arg_names:
--> 266     mv = self.store.create_model_version(
    267         name,
    268         source,
    269         run_id,
    270         tags,
    271         run_link,
    272         description,
    273         local_model_path=local_model_path,
    274         model_id=model_id,
    275     )
    276 else:
    277     # Fall back to calling create_model_version without
    278     # local_model_path since old model registry store implementations may not
    279     # support the local_model_path argument.
    280     mv = self.store.create_model_version(
    281         name, source, run_id, tags, run_link, description, model_id=model_id
    282     )
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/store/_unity_catalog/registry/rest_store.py:983, in UcModelRegistryStore.create_model_version(self, name, source, run_id, tags, run_link, description, local_model_path, model_id)
    978 model_version = self._call_endpoint(
    979     CreateModelVersionRequest, req_body, extra_headers=extra_headers
    980 ).model_version
    982 store = self._get_artifact_repo(model_version, full_name)
--> 983 store.log_artifacts(local_dir=local_model_dir, artifact_path="")
    984 finalized_mv = self._finalize_model_version(
    985     name=full_name, version=model_version.version
    986 )
    987 return model_version_from_uc_proto(finalized_mv)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-ea9e38f3-62ca-47d0-b399-bd069dfe7414/lib/python3.12/site-packages/mlflow/store/artifact/cloud_artifact_repo.py:168, in CloudArtifactRepository.log_artifacts(self, local_dir, artifact_path)
    165             failed_uploads[src_file_path] = repr(e)
    167 if len(failed_uploads) > 0:
--> 168     raise MlflowException(
    169         message=(
    170             "The following failures occurred while uploading one or more artifacts"
    171             f" to {self.artifact_uri}: {failed_uploads}"
    172         )
    173     )