Unable to call logged ML model from a different notebook when using Spark ML

DK
New Contributor II

Hi, I am a R user and I am experimenting to build an ml model with R and with spark flavoured algorithms in Databricks. However, I am struggling to call a model that is logged as part of the experiment from a different notebook when I use spark flavoured algorithms. This intro may not make sense but please bear with me and I will explain below with codes the issue I am having (I have used community Databricks edition). Same result with my office licensed Databricks service.

Platform: Databricks community edition

Cluster specs: 11.2 ML (includes Apache Spark 3.3.0, Scala 2.12) 

Language: R

In a notebook 1:

# i used the built-in data so it can be replicated
install.packages("mlflow")
library(mlflow)
install_mlflow()
install.packages("carrier")
library(sparklyr)
 
 
sc <- sparklyr::spark_connect(method = "databricks")
 
# convert to spark table
iris_tbl <- sparklyr::sdf_copy_to(sc, iris, "iris", overwrite = TRUE)
 
# build a model with kmeans based on sparklyr lib and use MLflow to log
with(mlflow_start_run(),{
 kmeans_model <- sparklyr::ml_kmeans(iris_tbl, k = 3, features = c("Petal_Length", "Petal_Width"))
  
 predicted <- carrier::crate(~sparklyr::ml_predict(!!kmeans_model, .x))
  
 mlflow_log_model(predicted, "model")
})
 
 
# call the logged model from the experiment artifact
logged_model = 'runs:/996bc4f0ad2a4681a4acf42515ee73d5/model'
loaded_model = mlflow_load_model(logged_model)
 
# predict using the loaded model
loaded_model(iris_tbl)
 
# predicts perfectly
(2) Spark Jobs
# Source: spark<?> [?? x 7]
  Sepal_Length Sepal_Width Petal_Length Petal_Width Species features predict…¹
     <dbl>    <dbl>    <dbl>    <dbl> <chr>  <list>    <int>
 1     5.1     3.5     1.4     0.2 setosa <dbl [2]>     1
 2     4.9     3      1.4     0.2 setosa <dbl [2]>     1
 3     4.7     3.2     1.3     0.2 setosa <dbl [2]>     1
 4     4.6     3.1     1.5     0.2 setosa <dbl [2]>     1
 5     5      3.6     1.4     0.2 setosa <dbl [2]>     1
 6     5.4     3.9     1.7     0.4 setosa <dbl [2]>     1
 7     4.6     3.4     1.4     0.3 setosa <dbl [2]>     1
 8     5      3.4     1.5     0.2 setosa <dbl [2]>     1
 9     4.4     2.9     1.4     0.2 setosa <dbl [2]>     1
10     4.9     3.1     1.5     0.1 setosa <dbl [2]>     1
# … with more rows, and abbreviated variable name ¹​prediction
# ℹ Use `print(n = ...)` to see more rows

However, in Notebook 2, when I try to predict with the same loaded model it throws error - which I can't make any sense of

# install and load library 
install.packages("mlflow")
library(mlflow)
install_mlflow()
 
install.packages("carrier")
library(sparklyr)
 
 
sc <- sparklyr::spark_connect(method = "databricks")
iris_tbl <- sparklyr::sdf_copy_to(sc, iris, "iris", overwrite = TRUE)
 
# call the logged model and load it
logged_model = 'runs:/996bc4f0ad2a4681a4acf42515ee73d5/model'
loaded_model = mlflow_load_model(logged_model)
 
# predict
loaded_model(iris_tbl)
 
# However following error pops-up
 
 
Error : java.lang.IllegalArgumentException: Object not found 171
	at sparklyr.StreamHandler.handleMethodCall(stream.scala:115)
	at sparklyr.StreamHandler.read(stream.scala:62)
	at sparklyr.BackendHandler.$anonfun$channelRead0$1(handler.scala:60)
	at scala.util.control.Breaks.breakable(Breaks.scala:42)
	at sparklyr.BackendHandler.channelRead0(handler.scala:41)
	at sparklyr.BackendHandler.channelRead0(handler.scala:14)
	at io.netty.channel.SimpleChannelInboundHandler.channelRead(SimpleChannelInboundHandler.java:99)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
	at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:103)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
	at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:327)
	at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:299)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
	at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:357)
	at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:379)
	at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:365)
	at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)
	at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166)
	at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:722)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:658)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:584)
	at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:496)
	at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:986)
	at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
	at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
	at java.lang.Thread.run(Thread.java:748)
 
Error: java.lang.IllegalArgumentException: Object not found 171

Can you please help me to resolve this issue. Thanks