I am receiving a weird error when trying to log an xgboost model using feature engineering api.
I was able to log the model correctly with classic mlflow.xgboost.log_model() without any issues but when I switched to feature store recommended approach I am not able to correctly log it.
This is the error I get, quite weird since I am not passing any code_path argument:
TypeError: log_model() got an unexpected keyword argument 'code_path'
I am using:
- Runtime: 15.4.x-cpu-ml-scala2.12
- mlflow: '3.1.4'
- databricks-feature-store: '0.17.0'
- xgboost: '3.0.2'
Here below is the code (I believe I'm following all the right steps per documentation) :
mlflow.set_tracking_uri("databricks")
mlflow.set_registry_uri("databricks-uc")
# Read feature store from Unity Catalog
feature_store_name = "catalog.schema.name"
fe = FeatureEngineeringClient()
df_spark = fe.read_table(name=feature_store_name)
seed = 42
target = 'y'
features = ["list of features to use"]
columns_to_keep = ['CustomerID', target]
df_spark = df_spark.select(columns_to_keep)
# Create feature lookup
feature_lookups = [
FeatureLookup(
table_name=feature_store_name,
feature_names=features,
lookup_key=['CustomerID']
)
]
# Create a training set
training_set = fe.create_training_set(df=df_spark,
feature_lookups=feature_lookups,
label=target,
exclude_columns=['CustomerID'])
# Loading the training df
training_df = training_set.load_df()
# Split the data into training and test sets
training_df, test_df = training_df.randomSplit([0.8, 0.2], seed=seed)
training_df = training_df.toPandas()
test_df = test_df.toPandas()
with mlflow.start_run() as run:
xgb_clf = XGBClassifier(colsample_bytree=0.05,
max_depth=3,
max_leaves=20,
eta=0.1,
n_estimators=10,
seed=seed)
xgb_clf_model = xgb_clf.fit(training_df[features], training_df[target])
# prediction
train_pred = xgb_clf_model.predict_proba(training_df[features])[:, 1]
test_pred = xgb_clf_model.predict_proba(test_df[features])[:, 1]
# score
auc_train = roc_auc_score(training_df[target], train_pred)
auc_test = roc_auc_score(test_df[target], test_pred)
# Log model
fe.log_model(
model=xgb_clf_model.get_booster(),
artifact_path="model",
flavor=mlflow.xgboost,
training_set=training_set
)