<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Not able to log xgboost model to mlflow in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38252#M1992</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/84729"&gt;@raghagra&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;Can you try the following code instead (please modify according to your need) to log the model:&lt;/P&gt;&lt;PRE&gt;import mlflow

with mlflow.start_run(experiment_id="1234") as run:
    mlflow.set_tag("status", "started")
    mlflow.log_param("git_hash", "1234")
    mlflow.log_param("env", "stg")
    mlflow.log_param("pipeline_id", "es_s3_to_raw")

    run_id = run.info.run_uuid
    mlflow.log_param("run_id", run_id)

    mlflow.set_tag("run_url", "&lt;A href="https://iterable-stg1-eu.cloud.databricks.com/?o=1290038165324274#job/219382284214852/run/1726769" target="_blank" rel="noopener noreferrer"&gt;URL&amp;nbsp;of&amp;nbsp;your&amp;nbsp;model&lt;/A&gt;")
    mlflow.log_param("id_in_job", "1726769")
    mlflow.log_param("context.user", "your email id")&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 24 Jul 2023 07:55:02 GMT</pubDate>
    <dc:creator>Kumaran</dc:creator>
    <dc:date>2023-07-24T07:55:02Z</dc:date>
    <item>
      <title>Not able to log xgboost model to mlflow</title>
      <link>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/37957#M1968</link>
      <description>&lt;P&gt;I have been trying to log mlflow model but seems to be not working. It logs only the last(which is also the worst run).&lt;/P&gt;&lt;LI-CODE lang="python"&gt;#-------------------------------------------------------13.0 ML XGBOost-------------------------------------------------------------------------
#train_df=train_df.limit(188123)

from hyperopt import fmin, tpe, Trials, hp
import numpy as np
import mlflow
import mlflow.spark





from pyspark.ml.feature import StringIndexer, VectorAssembler
from pyspark.ml import Pipeline
from xgboost.spark import SparkXGBRegressor
from pyspark.ml.evaluation import RegressionEvaluator
import numpy as np
from mlflow.models.signature import infer_signature
#vec_assembler = VectorAssembler(inputCols=train_df.columns[1:], outputCol="features")

xgb = SparkXGBRegressor(num_workers=1, label_col="price", missing=0.0)
# pipeline = Pipeline(stages=[vec_assembler, xgb])
pipeline = Pipeline(stages=[ordinal_encoder, vec_assembler, xgb])
regression_evaluator = RegressionEvaluator(predictionCol="prediction", labelCol="price")

def objective_function(params):    
    # set the hyperparameters that we want to tune
    max_depth = params["max_depth"]
    n_estimators = params["n_estimators"]

    with mlflow.start_run(nested=True):
        estimator = pipeline.copy({xgb.max_depth: max_depth, xgb.n_estimators: n_estimators})
        model = estimator.fit(train_df)

        preds = model.transform(test_df)
        rmse = regression_evaluator.evaluate(preds)
        #r2 = regression_evaluator.setMetricName("r2").evaluate(preds)
        mlflow.log_metric("rmse", rmse)
        #mlflow.log_metric("r2", r2)

    return rmse


search_space = {
    "max_depth" : hp.choice('max_depth', np.arange(12, 15, dtype=int)),
     "n_estimators": hp.choice('n_estimators', np.arange(50, 80, dtype=int))
}


mlflow.pyspark.ml.autolog(log_models=True,log_datasets=False)
#mlflow.sklearn.autolog(log_models=False,log_datasets=False)
#mlflow.xgboost.autolog(log_models=True)
#mlflow.transformers.autolog(log_models=False)

num_evals = 1
trials = Trials()
best_hyperparam = fmin(fn=objective_function, 
                       space=search_space,
                       algo=tpe.suggest, 
                       max_evals=num_evals,
                       trials=trials,
                       rstate=np.random.default_rng(42))

# Retrain model on train &amp;amp; validation dataset and evaluate on test dataset
with mlflow.start_run():
    best_max_depth = best_hyperparam["max_depth"]
    best_n_estimators = best_hyperparam["n_estimators"]
    estimator = pipeline.copy({xgb.max_depth: best_max_depth, xgb.n_estimators: best_n_estimators})
    #combined_df = train_df.union(test_df) # Combine train &amp;amp; validation together

    pipeline_model = estimator.fit(train_df)
    pred_df = pipeline_model.transform(test_df)
    #signature = infer_signature(test_df, pred_df)
    rmse = regression_evaluator.evaluate(pred_df)
    r2 = regression_evaluator.setMetricName("r2").evaluate(pred_df)

    # Log param and metrics for the final model
    mlflow.log_param("maxdepth", best_max_depth)
    mlflow.log_param("n_estimators", best_n_estimators)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    # mlflow.transformers.log_model(pipeline_model,"model",input_example=test_df.select(old_cols_list).limit(1).toPandas())
    mlflow.spark.log_model(pipeline_model ,"model",input_example=test_df.select(old_cols_list).limit(1).toPandas())
    #mlflow.xgboost.log_model(pipeline_model,"model",input_example=test_df.select(old_cols_list).limit(1).toPandas())
    # mlflow.sklearn.log_model(pipeline_model,"model",input_example=test_df.select(old_cols_list).limit(1).toPandas())
 &lt;/LI-CODE&gt;</description>
      <pubDate>Wed, 19 Jul 2023 12:15:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/37957#M1968</guid>
      <dc:creator>raghagra</dc:creator>
      <dc:date>2023-07-19T12:15:27Z</dc:date>
    </item>
    <item>
      <title>Re: Not able to log xgboost model to mlflow</title>
      <link>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38185#M1982</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/84729"&gt;@raghagra&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;Thank you for posting your question in the Databricks community.&lt;/P&gt;&lt;P&gt;The reason why the code is only logging the last run is because you are using the mlflow.start_run() function inside the objective_function() function. This means that each time you call the objective_function() function, it will start a new run. The mlflow.spark.log_model() function only logs the model for the current run, so the model will only be logged for the last run.&lt;/P&gt;&lt;P&gt;To fix this, you can move the mlflow.start_run() function outside of the objective_function() function. This will ensure that the model is logged for every run.&lt;/P&gt;&lt;P&gt;Please check how it works.&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 19:45:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38185#M1982</guid>
      <dc:creator>Kumaran</dc:creator>
      <dc:date>2023-07-21T19:45:27Z</dc:date>
    </item>
    <item>
      <title>Re: Not able to log xgboost model to mlflow</title>
      <link>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38196#M1988</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/63081"&gt;@Kumaran&lt;/a&gt;&amp;nbsp;Still did not work. getting the below error:&lt;BR /&gt;&lt;SPAN&gt;2023/07/22 11:30:21 INFO mlflow.spark: Inferring pip requirements by reloading the logged model from the databricks artifact repository, which can be time-consuming. To speed up, explicitly specify the conda_env or pip_requirements when calling log_model(). 2023/07/22 11:31:02 WARNING mlflow.utils.environment: Encountered an unexpected error while inferring pip requirements (model URI: dbfs:/databricks/mlflow-tracking/590967242928602/e3bd64c64535425192a510bd4ee66dec/artifacts/xgb_model/sparkml, flavor: spark), fall back to return ['pyspark==3.4.0']. Set logging level to DEBUG to see the full traceback. /databricks/python/lib/python3.10/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils. warnings.warn("Setuptools is replacing distutils.")&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 22 Jul 2023 11:40:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38196#M1988</guid>
      <dc:creator>raghagra</dc:creator>
      <dc:date>2023-07-22T11:40:31Z</dc:date>
    </item>
    <item>
      <title>Re: Not able to log xgboost model to mlflow</title>
      <link>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38252#M1992</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/84729"&gt;@raghagra&lt;/a&gt;,&lt;/P&gt;&lt;P&gt;Can you try the following code instead (please modify according to your need) to log the model:&lt;/P&gt;&lt;PRE&gt;import mlflow

with mlflow.start_run(experiment_id="1234") as run:
    mlflow.set_tag("status", "started")
    mlflow.log_param("git_hash", "1234")
    mlflow.log_param("env", "stg")
    mlflow.log_param("pipeline_id", "es_s3_to_raw")

    run_id = run.info.run_uuid
    mlflow.log_param("run_id", run_id)

    mlflow.set_tag("run_url", "&lt;A href="https://iterable-stg1-eu.cloud.databricks.com/?o=1290038165324274#job/219382284214852/run/1726769" target="_blank" rel="noopener noreferrer"&gt;URL&amp;nbsp;of&amp;nbsp;your&amp;nbsp;model&lt;/A&gt;")
    mlflow.log_param("id_in_job", "1726769")
    mlflow.log_param("context.user", "your email id")&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 24 Jul 2023 07:55:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38252#M1992</guid>
      <dc:creator>Kumaran</dc:creator>
      <dc:date>2023-07-24T07:55:02Z</dc:date>
    </item>
    <item>
      <title>Re: Not able to log xgboost model to mlflow</title>
      <link>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38773#M2009</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/63081"&gt;@Kumaran&lt;/a&gt;&amp;nbsp;Ran this code, but any specific log that I should be looking for?&lt;/P&gt;</description>
      <pubDate>Mon, 31 Jul 2023 10:23:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/not-able-to-log-xgboost-model-to-mlflow/m-p/38773#M2009</guid>
      <dc:creator>raghagra</dc:creator>
      <dc:date>2023-07-31T10:23:20Z</dc:date>
    </item>
  </channel>
</rss>

