Hello Everyone,
I am trying to load a SparkNLP (link for more details about the model if required) from Mlflow Registry.
To this end, I have followed one tutorial and implemented below codes:
import mlflow.pyfunc
class LangDetectionModel(mlflow.pyfunc.PythonModel):
def __init__(self):
super().__init__()
from sparknlp.pretrained import PretrainedPipeline
from sparknlp.pretrained import PipelineModel
# embed the sparknlp model
self._model = PipelineModel.load("/mnt/sparknlp_models/detect_language_375/")
def predict(self, eval_data_lang_detect):
# Apply the transform function for lang detetction
list_columns = eval_data_lang_detect.columns
model_output =self._model.transform(eval_data_lang_detect).select(list_columns+ [F.col("language.result").getItem(0)]).withColumnRenamed('language.result[0]','sparknlp_column')
return model_output
model_path = "my-langdetect-model"
reg_model_name = "NlpieLangDetection"
sparknlp_model = LangDetectionModel()
# Log MLflow entities and save the model
mlflow.set_tracking_uri("sqlite:///mlruns.db")
# Save the conda environment for this model.
conda_env = {
'channels': ['defaults', 'conda-forge'],
'dependencies': [
'python={}'.format(PYTHON_VERSION),
'pip'],
'pip': [
'mlflow',
'cloudpickle=={}'.format(cloudpickle.__version__),
'NlpieLangDetection==0.0.1'
],
'name': 'mlflow-env'
}
# Save the model
mlflow.set_experiment('/Users/Youssef.Meguebli@sanofi.com/Language_Detection_Translation/LangDetectionTest')
with mlflow.start_run(run_name="Nlpie Language Detection") as run:
model_path = f"{model_path}-{run.info.run_uuid}"
mlflow.log_param("algorithm", "SparNLPLangDetection")
mlflow.pyfunc.save_model(path=model_path, python_model=sparknlp_model, conda_env=conda_env)
I am getting an error on last piece of code where I am trying to save the model on Mlflow registry.
Below the error get I am getting:
TypeError: cannot pickle '_thread.RLock' object
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<command-2121909764500367> in <module>
4 model_path = f"{model_path}-{run.info.run_uuid}"
5 mlflow.log_param("algorithm", "SparNLPLangDetection")
----> 6 mlflow.pyfunc.save_model(path=model_path, python_model=sparknlp_model, conda_env=conda_env)
/databricks/python/lib/python3.8/site-packages/mlflow/pyfunc/__init__.py in save_model(path, loader_module, data_path, code_path, conda_env, mlflow_model, python_model, artifacts, signature, input_example, pip_requirements, extra_pip_requirements, **kwargs)
1467 )
1468 elif second_argument_set_specified:
-> 1469 return mlflow.pyfunc.model._save_model_with_class_artifacts_params(
1470 path=path,
1471 python_model=python_model,
/databricks/python/lib/python3.8/site-packages/mlflow/pyfunc/model.py in _save_model_with_class_artifacts_params(path, python_model, artifacts, conda_env, code_paths, mlflow_model, pip_requirements, extra_pip_requirements)
162 saved_python_model_subpath = "python_model.pkl"
163 with open(os.path.join(path, saved_python_model_subpath), "wb") as out:
--> 164 cloudpickle.dump(python_model, out)
165 custom_model_config_kwargs[CONFIG_KEY_PYTHON_MODEL] = saved_python_model_subpath
166 else:
/databricks/python/lib/python3.8/site-packages/cloudpickle/cloudpickle_fast.py in dump(obj, file, protocol, buffer_callback)
53 compatibility with older versions of Python.
54 """
---> 55 CloudPickler(
56 file, protocol=protocol, buffer_callback=buffer_callback
57 ).dump(obj)
/databricks/python/lib/python3.8/site-packages/cloudpickle/cloudpickle_fast.py in dump(self, obj)
631 def dump(self, obj):
632 try:
--> 633 return Pickler.dump(self, obj)
634 except RuntimeError as e:
635 if "recursion" in e.args[0]:
TypeError: cannot pickle '_thread.RLock' object
Please let me know if you need any further details.
Many Thanks in advance for your support.