We have multiple processing chains that uses R notebooks with Sparklyr, and we try to migrate them from runtime 10.4 to 12.2. unfortunately, there seems to be an incompatibility with Sparlyr with runtimes > 10.4:
Steps to reproduce:
1) Create a notebook "test" with the following code:
%r
library(sparklyr)
sc <- spark_connect(method = "databricks")
sdf_sql(sc, "SELECT * FROM samples.nyctaxi.trips limit 100")
2) Clone the notebook as "test2"
3) Execute the notebook "test" on a 12.2 cluster: it works as expected
4) Exectute the "test2" on the SAME cluster: you get the following error:
Error : org.apache.spark.SparkException: Missing Credential Scope.
at com.databricks.unity.UCSDriver$Manager.$anonfun$scope$1(UCSDriver.scala:104)
at scala.Option.getOrElse(Option.scala:189)
at com.databricks.unity.UCSDriver$Manager.scope(UCSDriver.scala:104)
at com.databricks.unity.UCSDriver$Manager.currentScope(UCSDriver.scala:98)
at com.databricks.unity.UnityCredentialScope$.currentScope(UnityCredentialScope.scala:100)
at com.databricks.unity.UnityCredentialScope$.getSAMRegistry(UnityCredentialScope.scala:120)
at com.databricks.unity.SAMRegistry$.getSAMOpt(SAMRegistry.scala:358)
at com.databricks.unity.CredentialScopeSQLHelper$.registerPathForDeltaLog(CredentialScopeSQLHelper.scala:254)
at com.databricks.sql.transaction.tahoe.DeltaLog$.apply(DeltaLog.scala:931)
at com.databricks.sql.transaction.tahoe.DeltaLog$.apply(DeltaLog.scala:864)
at com.databricks.sql.transaction.tahoe.DeltaLog$.apply(DeltaLog.scala:844)
at com.databricks.sql.transaction.tahoe.DeltaLog$.forTable(DeltaLog.scala:791)
at com.databricks.sql.transaction.tahoe.DeltaLog$.$anonfun$forTableWithSnapshot$1(DeltaLog.scala:870)
at com.databricks.sql.transaction.tahoe.DeltaLog$.withFreshSnapshot(DeltaLog.scala:903)
at com.databricks.sql.transaction.tahoe.DeltaLog$.forTableWithSnapshot(DeltaLog.scala:870)
at com.databricks.sql.managedcatalog.SampleTable.readSchema(SampleTables.scala:109)
at com.databricks.sql.managedcatalog.ManagedCatalogSessionCatalog.$anonfun$getSampleTableMetadata$1(ManagedCatalogSessionCatalog.scala:954)
at scala.Option.map(Option.scala:230)
at com.databricks.sql.managedcatalog.ManagedCatalogSessionCatalog.getSampleTableMetadata(ManagedCatalogSessionCatalog.scala:949)
at com.databricks.sql.managedcatalog.ManagedCatalogSessionCatalog.$anonfun$fastGetTablesByName$6(ManagedCatalogSessionCatalog.scala:1057)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.immutable.List.foreach(List.scala:431)
at scala.collection.generic.TraversableForwarder.foreach(TraversableForwarder.scala:38)
at scala.collection.generic.TraversableForwarder.foreach$(TraversableForwarder.scala:38)
at scala.collection.mutable.ListBuffer.foreach(ListBuffer.scala:47)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at com.databricks.sql.managedcatalog.ManagedCatalogSessionCatalog.fastGetTablesByName(ManagedCatalogSessionCatalog.scala:1057)
at com.databricks.sql.transaction.tahoe.catalog.DeltaCatalog.fetchFromCatalog(DeltaCatalog.scala:498)
at com.databricks.sql.transaction.tahoe.catalog.DeltaCatalog.$anonfun$loadTables$1(DeltaCatalog.scala:439)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:265)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:263)
at com.databricks.sql.transaction.tahoe.catalog.DeltaCatalog.recordFrameProfile(DeltaCatalog.scala:86)
at com.databricks.sql.transaction.tahoe.catalog.DeltaCatalog.loadTables(DeltaCatalog.scala:436)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anon$3.$anonfun$submit$1(Analyzer.scala:1870)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.org$apache$spark$sql$catalyst$analysis$Analyzer$ResolveRelations$$record(Analyzer.scala:1929)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anon$3.submit(Analyzer.scala:1852)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:1472)
at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:1412)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$4(RuleExecutor.scala:229)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$3(RuleExecutor.scala:229)
at scala.collection.LinearSeqOptimized.foldLeft(LinearSeqOptimized.scala:126)
at scala.collection.LinearSeqOptimized.foldLeft$(LinearSeqOptimized.scala:122)
at scala.collection.immutable.List.foldLeft(List.scala:91)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:226)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeBatch$1(RuleExecutor.scala:218)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$8(RuleExecutor.scala:296)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$8$adapted(RuleExecutor.scala:296)
at scala.collection.immutable.List.foreach(List.scala:431)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:296)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:197)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeSameContext(Analyzer.scala:361)
at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$execute$1(Analyzer.scala:354)
at org.apache.spark.sql.catalyst.analysis.AnalysisContext$.withNewAnalysisContext(Analyzer.scala:261)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:354)
at org.apache.spark.sql.catalyst.analysis.Analyzer.execute(Analyzer.scala:282)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:189)
at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:153)
at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:189)
at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:334)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:379)
at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:333)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:153)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:319)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$3(QueryExecution.scala:372)
at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:808)
at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:372)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1020)
at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:369)
at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:147)
at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:147)
at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:137)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:111)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1020)
at org.apache.spark.sql.SparkSession.$anonfun$withActiveAndFrameProfiler$1(SparkSession.scala:1027)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:80)
at org.apache.spark.sql.SparkSession.withActiveAndFrameProfiler(SparkSession.scala:1027)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:109)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:830)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1020)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:822)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:856)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.refl
Error: org.apache.spark.SparkException: Missing Credential Scope.
There is not this problem on a 10.4 runtime.
To note:
- The cluster is Unrestricted / single user access type
- Unity catalog is activated on our workspace, but I don't think there is an interaction, as apparently there is other people with similar problems: stackoverflow link
Also, I'm not sure but I think there may is the same problem when doing %run to run other notebooks.
Right now it stops our migration to newer version, and it can quickly become an issue if we can't access new functionnalities.