The execution of code stucks when evaluation of data start.
eval_df = pd.DataFrame(
{
"inputs": [
"What is MLflow?",
"What is Spark?",
],
"ground_truth": [
"MLflow is an open-source platform for managing the end-to-end machine learning (ML) "
"lifecycle. It was developed by Databricks, a company that specializes in big data and "
"machine learning solutions. MLflow is designed to address the challenges that data "
"scientists and machine learning engineers face when developing, training, and deploying "
"machine learning models.",
"Apache Spark is an open-source, distributed computing system designed for big data "
"processing and analytics. It was developed in response to limitations of the Hadoop "
"MapReduce computing model, offering improvements in speed and ease of use. Spark "
"provides libraries for various tasks such as data ingestion, processing, and analysis "
"through its components like Spark SQL for structured data, Spark Streaming for "
"real-time data processing, and MLlib for machine learning tasks",
],
}
)
with mlflow.start_run(run_name="logging_model_as_openai_model", log_system_metrics=True) as run:
mlflow.doctor()
#log model as pyfunc
logged_model = mlflow.pyfunc.log_model(artifact_path="model", python_model=llm_response, pip_requirements=["openai"], signature=None)
run_id = mlflow.active_run().info.run_id
# load model using runid
model = mlflow.pyfunc.load_model(f"runs:/{run_id}/model")
results = mlflow.evaluate(
model,
eval_df,
targets="ground_truth", # specify which column corresponds to the expected output
model_type="question-answering", # model type indicates which metrics are relevant for this task
evaluators="default",
)
results.metrics