I'm running the following python code from one of the databricks training materials.
import mlflow
import mlflow.spark
from pyspark.ml.regression import LinearRegression
from pyspark.ml.feature import VectorAssembler
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import RegressionEvaluator
with mlflow.start_run(run_name="LR-Single-Feature") as run:
# Define pipeline
vec_assembler = VectorAssembler(inputCols=["bedrooms"], outputCol="features")
lr = LinearRegression(featuresCol="features", labelCol="price")
pipeline = Pipeline(stages=[vec_assembler, lr])
pipeline_model = pipeline.fit(train_df)
# Log parameters
mlflow.log_param("label", "price")
mlflow.log_param("features", "bedrooms")
# Log model
mlflow.spark.log_model(pipeline_model, "model", input_example=train_df.limit(5).toPandas())
The last line of code "mlflow.spark.log_model(pipeline_model, "model", input_example=train_df.limit(5).toPandas()) " caused the following warning.
WARNING mlflow.utils.environment: Encountered an unexpected error while inferring pip requirements (model URI: /tmp/tmpchgj6je8, flavor: spark), fall back to return ['pyspark==3.3.0']. Set logging level to DEBUG to see the full traceback.
Can anyone help with the cause of this and method to fix it? Thanks very much!