Hi Team,
I am getting the following error (mlflow.utils.databricks_utils._NoDbutilsError) while trying to register model using applyInPandas to another databricks workspace.
I have already set the secret scope and prefix to authenticate against the 2nd azure databricks workspace where the models need to be registered and passing the remote URI in
mlflow.set_registry_uri('databricks://prod-mlflowreg:prodwss')
If I do it without using applyInPandas, it works. But not with applyInPandas.
Is it that mlflow doesn't work when executed in a worker node, inside a UDF. It looks like mlflow is not able to grab the host credentials for 2nd workspace. (get_databricks_host_creds in error)
Can you please take a look into this. Thanks.
def register_models(row):
row.reset_index(drop=True, inplace=True)
mlflow.set_registry_uri('databricks://prod-mlflowreg:prodwss')
mlflow.register_model("runs:/" + row['run_id'][0] + "/model", row['run_id'][0])
return pd.DataFrame(columns = ['null'])
Defining schema of the resulting dataframe:
esult_schema = StructType( [StructField('null', StringType())] )
Register the models to the Mlflow registry
f_reg = sdf.groupBy("run_id") \ .applyInPandas(lambda df: register_models(df), result_schema)
Error:
--------------------------------------------------------------------------- PythonException Traceback (most recent call last) <command-1616661595521875> in <module> 1 # Triggering action ----> 2 df_reg.count()
/databricks/spark/python/pyspark/sql/dataframe.py in count(self) 584 2 585 """ --> 586 return int(self._jdf.count()) 587 588 @ignore_unicode_prefix
/databricks/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py in call(self, *args) 1303 answer = self.gateway_client.send_command(command) 1304 return_value = get_return_value( -> 1305 answer, self.gateway_client, self.target_id, self.name) 1306 1307 for temp_arg in temp_args:
/databricks/spark/python/pyspark/sql/utils.py in deco(a, *kw) 131 # Hide where the exception came from that shows a non-Pythonic 132 # JVM exception message. --> 133 raise_from(converted) 134 else: 135 raise
/databricks/spark/python/pyspark/sql/utils.py in raise_from(e)
PythonException: An exception was thrown from a UDF: 'mlflow.utils.databricks_utils._NoDbutilsError', from <command-1616661595521870>, line 18. Full traceback below: Traceback (most recent call last): File "/databricks/spark/python/pyspark/worker.py", line 654, in main process() File "/databricks/spark/python/pyspark/worker.py", line 646, in process serializer.dump_stream(out_iter, outfile) File "/databricks/spark/python/pyspark/sql/pandas/serializers.py", line 281, in dump_stream timely_flush_timeout_ms=self.timely_flush_timeout_ms) File "/databricks/spark/python/pyspark/sql/pandas/serializers.py", line 97, in dump_stream for batch in iterator: File "/databricks/spark/python/pyspark/sql/pandas/serializers.py", line 271, in init_stream_yield_batches for series in iterator: File "/databricks/spark/python/pyspark/worker.py", line 446, in mapper return f(keys, vals) File "/databricks/spark/python/pyspark/worker.py", line 176, in <lambda> return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))] File "/databricks/spark/python/pyspark/worker.py", line 161, in wrapped result = f(pd.concat(value_series, axis=1)) File "/databricks/spark/python/pyspark/util.py", line 109, in wrapper return f(args, *kwargs) File "<command-1616661595521874>", line 8, in <lambda> File "<command-1616661595521870>", line 18, in register_models File "/databricks/python/lib/python3.7/site-packages/mlflow/tracking/_model_registry/fluent.py", line 27, in register_model create_model_response = client.create_registered_model(name) File "/databricks/python/lib/python3.7/site-packages/mlflow/tracking/client.py", line 395, in create_registered_model return self._get_registry_client().create_registered_model(name, tags, description) File "/databricks/python/lib/python3.7/site-packages/mlflow/tracking/_model_registry/client.py", line 47, in create_registered_model return self.store.create_registered_model(name, tags, description) File "/databricks/python/lib/python3.7/site-packages/mlflow/store/model_registry/rest_store.py", line 74, in create_registered_model response_proto = self._call_endpoint(CreateRegisteredModel, req_body) File "/databricks/python/lib/python3.7/site-packages/mlflow/store/model_registry/rest_store.py", line 55, in _call_endpoint return call_endpoint(self.get_host_creds(), endpoint, method, json_body, response_proto) File "/databricks/python/lib/python3.7/site-packages/mlflow/tracking/_model_registry/utils.py", line 94, in <lambda> return RestStore(lambda: get_databricks_host_creds(store_uri)) File "/databricks/python/lib/python3.7/site-packages/mlflow/utils/databricks_utils.py", line 216, in get_databricks_host_creds dbutils = _get_dbutils() File "/databricks/python/lib/python3.7/site-packages/mlflow/utils/databricks_utils.py", line 21, in _get_dbutils raise _NoDbutilsError mlflow.utils.databricks_utils._NoDbutilsError