<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic unity catalog with external table and column masking in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/unity-catalog-with-external-table-and-column-masking/m-p/75352#M34944</link>
    <description>&lt;P&gt;Hi everbody,&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am facing a issue with spark structured steaming.&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&lt;SPAN&gt;here is a sample of my code:&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;df = spark.readStream.load(f"{bronze_table_path}") df.writeStream \ .format("delta") \ .option("checkpointLocation", f"{silver_checkpoint}") \ .option("mergeSchema", "true") \ .trigger(availableNow=True) \ .outputMode("append") \ .start(path=f"{silver_table_path}")&amp;lt;div&amp;gt; The code above work pretty well&amp;lt;div&amp;gt;&amp;lt;p&amp;gt;But if i add column masking to silver table, and rerun the notebook i get the following error &amp;lt;li-code lang="markup"&amp;gt;Exception: Exception in quality process: [RequestId=xxxx-exxx-xxxx-adf4-86b9b7e82252 ErrorClass=INVALID_PARAMETER_VALUE.INVALID_PARAMETER_VALUE] Input path gs://table overlaps with other external tables or volumes. Conflicting tables/volumes: xxx.xxx.table, xxx.xxx.another_table JVM stacktrace: com.databricks.sql.managedcatalog.UnityCatalogServiceException at com.databricks.managedcatalog.TypeConversionUtils$.toUnityCatalogDeniedException(TypeConversionUtils.scala:2224) at com.databricks.managedcatalog.TypeConversionUtils$.toCatalyst(TypeConversionUtils.scala:2181) at com.databricks.managedcatalog.ManagedCatalogClientImpl.$anonfun$checkPathAccess$1(ManagedCatalogClientImpl.scala:4088) at com.databricks.managedcatalog.ManagedCatalogClientImpl.$anonfun$recordAndWrapException$2(ManagedCatalogClientImpl.scala:4555) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at com.databricks.managedcatalog.ManagedCatalogClientImpl.$anonfun$recordAndWrapException$1(ManagedCatalogClientImpl.scala:4554) at com.databricks.managedcatalog.ErrorDetailsHandler.wrapServiceException(ErrorDetailsHandler.scala:26) at com.databricks.managedcatalog.ErrorDetailsHandler.wrapServiceException$(ErrorDetailsHandler.scala:24) at com.databricks.managedcatalog.ManagedCatalogClientImpl.wrapServiceException(ManagedCatalogClientImpl.scala:158) at com.databricks.managedcatalog.ManagedCatalogClientImpl.recordAndWrapException(ManagedCatalogClientImpl.scala:4551) at com.databricks.managedcatalog.ManagedCatalogClientImpl.checkPathAccess(ManagedCatalogClientImpl.scala:4064) at com.databricks.sql.managedcatalog.ManagedCatalogCommon.checkPathAccess(ManagedCatalogCommon.scala:1974) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.$anonfun$checkPathAccess$1(ProfiledManagedCatalog.scala:633) at org.apache.spark.sql.catalyst.MetricKeyUtils$.measure(MetricKey.scala:714) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.$anonfun$profile$1(ProfiledManagedCatalog.scala:62) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.profile(ProfiledManagedCatalog.scala:61) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.checkPathAccess(ProfiledManagedCatalog.scala:633) at com.databricks.unity.CredentialScopeSQLHelper$.registerShortestParentPath(CredentialScopeSQLHelper.scala:299) at com.databricks.unity.CredentialScopeSQLHelper$.register(CredentialScopeSQLHelper.scala:195) at com.databricks.unity.CredentialScopeSQLHelper$.registerPathAccess(CredentialScopeSQLHelper.scala:638) at org.apache.spark.sql.streaming.DataStreamUtils$.$anonfun$registerSinkPathInUC$1(DataStreamUtils.scala:267) at org.apache.spark.sql.streaming.DataStreamUtils$.$anonfun$registerSinkPathInUC$1$adapted(DataStreamUtils.scala:266) at scala.Option.foreach(Option.scala:407) at org.apache.spark.sql.streaming.DataStreamUtils$.registerSinkPathInUC(DataStreamUtils.scala:266) at org.apache.spark.sql.streaming.DataStreamWriter.startInternal(DataStreamWriter.scala:478) at org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:256) at org.apache.spark.sql.connect.planner.SparkConnectPlanner.handleWriteStreamOperationStart(SparkConnectPlanner.scala:3218) at org.apache.spark.sql.connect.planner.SparkConnectPlanner.process(SparkConnectPlanner.scala:2697) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.handleCommand(ExecuteThreadRunner.scala:285) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1(ExecuteThreadRunner.scala:229) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1$adapted(ExecuteThreadRunner.scala:167) at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$2(SessionHolder.scala:332) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1175) at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$1(SessionHolder.scala:332) at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:97) at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:84) at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:234) at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:83) at org.apache.spark.sql.connect.service.SessionHolder.withSession(SessionHolder.scala:331) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.executeInternal(ExecuteThreadRunner.scala:167) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.org$apache$spark$sql$connect$execution$ExecuteThreadRunner$$execute(ExecuteThreadRunner.scala:118) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner$ExecutionThread.$anonfun$run$1(ExecuteThreadRunner.scala:349) at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45) at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:103) at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:108) at scala.util.Using$.resource(Using.scala:269) at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:107) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner$ExecutionThread.run(ExecuteThreadRunner.scala:348)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;all table are external table and schema are managed schema&lt;/P&gt;&lt;P&gt;It is a known limitation of column masking ?&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks you&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 21 Jun 2024 13:45:59 GMT</pubDate>
    <dc:creator>christian_chong</dc:creator>
    <dc:date>2024-06-21T13:45:59Z</dc:date>
    <item>
      <title>unity catalog with external table and column masking</title>
      <link>https://community.databricks.com/t5/data-engineering/unity-catalog-with-external-table-and-column-masking/m-p/75352#M34944</link>
      <description>&lt;P&gt;Hi everbody,&amp;nbsp;&lt;/P&gt;&lt;P&gt;I am facing a issue with spark structured steaming.&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&lt;SPAN&gt;here is a sample of my code:&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;df = spark.readStream.load(f"{bronze_table_path}") df.writeStream \ .format("delta") \ .option("checkpointLocation", f"{silver_checkpoint}") \ .option("mergeSchema", "true") \ .trigger(availableNow=True) \ .outputMode("append") \ .start(path=f"{silver_table_path}")&amp;lt;div&amp;gt; The code above work pretty well&amp;lt;div&amp;gt;&amp;lt;p&amp;gt;But if i add column masking to silver table, and rerun the notebook i get the following error &amp;lt;li-code lang="markup"&amp;gt;Exception: Exception in quality process: [RequestId=xxxx-exxx-xxxx-adf4-86b9b7e82252 ErrorClass=INVALID_PARAMETER_VALUE.INVALID_PARAMETER_VALUE] Input path gs://table overlaps with other external tables or volumes. Conflicting tables/volumes: xxx.xxx.table, xxx.xxx.another_table JVM stacktrace: com.databricks.sql.managedcatalog.UnityCatalogServiceException at com.databricks.managedcatalog.TypeConversionUtils$.toUnityCatalogDeniedException(TypeConversionUtils.scala:2224) at com.databricks.managedcatalog.TypeConversionUtils$.toCatalyst(TypeConversionUtils.scala:2181) at com.databricks.managedcatalog.ManagedCatalogClientImpl.$anonfun$checkPathAccess$1(ManagedCatalogClientImpl.scala:4088) at com.databricks.managedcatalog.ManagedCatalogClientImpl.$anonfun$recordAndWrapException$2(ManagedCatalogClientImpl.scala:4555) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at com.databricks.managedcatalog.ManagedCatalogClientImpl.$anonfun$recordAndWrapException$1(ManagedCatalogClientImpl.scala:4554) at com.databricks.managedcatalog.ErrorDetailsHandler.wrapServiceException(ErrorDetailsHandler.scala:26) at com.databricks.managedcatalog.ErrorDetailsHandler.wrapServiceException$(ErrorDetailsHandler.scala:24) at com.databricks.managedcatalog.ManagedCatalogClientImpl.wrapServiceException(ManagedCatalogClientImpl.scala:158) at com.databricks.managedcatalog.ManagedCatalogClientImpl.recordAndWrapException(ManagedCatalogClientImpl.scala:4551) at com.databricks.managedcatalog.ManagedCatalogClientImpl.checkPathAccess(ManagedCatalogClientImpl.scala:4064) at com.databricks.sql.managedcatalog.ManagedCatalogCommon.checkPathAccess(ManagedCatalogCommon.scala:1974) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.$anonfun$checkPathAccess$1(ProfiledManagedCatalog.scala:633) at org.apache.spark.sql.catalyst.MetricKeyUtils$.measure(MetricKey.scala:714) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.$anonfun$profile$1(ProfiledManagedCatalog.scala:62) at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.profile(ProfiledManagedCatalog.scala:61) at com.databricks.sql.managedcatalog.ProfiledManagedCatalog.checkPathAccess(ProfiledManagedCatalog.scala:633) at com.databricks.unity.CredentialScopeSQLHelper$.registerShortestParentPath(CredentialScopeSQLHelper.scala:299) at com.databricks.unity.CredentialScopeSQLHelper$.register(CredentialScopeSQLHelper.scala:195) at com.databricks.unity.CredentialScopeSQLHelper$.registerPathAccess(CredentialScopeSQLHelper.scala:638) at org.apache.spark.sql.streaming.DataStreamUtils$.$anonfun$registerSinkPathInUC$1(DataStreamUtils.scala:267) at org.apache.spark.sql.streaming.DataStreamUtils$.$anonfun$registerSinkPathInUC$1$adapted(DataStreamUtils.scala:266) at scala.Option.foreach(Option.scala:407) at org.apache.spark.sql.streaming.DataStreamUtils$.registerSinkPathInUC(DataStreamUtils.scala:266) at org.apache.spark.sql.streaming.DataStreamWriter.startInternal(DataStreamWriter.scala:478) at org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:256) at org.apache.spark.sql.connect.planner.SparkConnectPlanner.handleWriteStreamOperationStart(SparkConnectPlanner.scala:3218) at org.apache.spark.sql.connect.planner.SparkConnectPlanner.process(SparkConnectPlanner.scala:2697) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.handleCommand(ExecuteThreadRunner.scala:285) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1(ExecuteThreadRunner.scala:229) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1$adapted(ExecuteThreadRunner.scala:167) at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$2(SessionHolder.scala:332) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:1175) at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$1(SessionHolder.scala:332) at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:97) at org.apache.spark.sql.artifact.ArtifactManager.$anonfun$withResources$1(ArtifactManager.scala:84) at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:234) at org.apache.spark.sql.artifact.ArtifactManager.withResources(ArtifactManager.scala:83) at org.apache.spark.sql.connect.service.SessionHolder.withSession(SessionHolder.scala:331) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.executeInternal(ExecuteThreadRunner.scala:167) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.org$apache$spark$sql$connect$execution$ExecuteThreadRunner$$execute(ExecuteThreadRunner.scala:118) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner$ExecutionThread.$anonfun$run$1(ExecuteThreadRunner.scala:349) at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:45) at com.databricks.unity.HandleImpl.runWith(UCSHandle.scala:103) at com.databricks.unity.HandleImpl.$anonfun$runWithAndClose$1(UCSHandle.scala:108) at scala.util.Using$.resource(Using.scala:269) at com.databricks.unity.HandleImpl.runWithAndClose(UCSHandle.scala:107) at org.apache.spark.sql.connect.execution.ExecuteThreadRunner$ExecutionThread.run(ExecuteThreadRunner.scala:348)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;all table are external table and schema are managed schema&lt;/P&gt;&lt;P&gt;It is a known limitation of column masking ?&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks you&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jun 2024 13:45:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unity-catalog-with-external-table-and-column-masking/m-p/75352#M34944</guid>
      <dc:creator>christian_chong</dc:creator>
      <dc:date>2024-06-21T13:45:59Z</dc:date>
    </item>
    <item>
      <title>Re: unity catalog with external table and column masking</title>
      <link>https://community.databricks.com/t5/data-engineering/unity-catalog-with-external-table-and-column-masking/m-p/75355#M34946</link>
      <description>&lt;P&gt;My first message was not well formatted.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;i wrote :&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;df = spark.readStream.load(f"{bronze_table_path}") 
df.writeStream \ 
.format("delta") \ 
.option("checkpointLocation", f"{silver_checkpoint}") \ 
.option("mergeSchema", "true") \ 
.trigger(availableNow=True) \ 
.outputMode("append") \ 
.start(path=f"{silver_table_path}")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;But if i add column masking to silver table, and rerun the notebook i get the following error&amp;nbsp; ...&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jun 2024 14:08:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unity-catalog-with-external-table-and-column-masking/m-p/75355#M34946</guid>
      <dc:creator>christian_chong</dc:creator>
      <dc:date>2024-06-21T14:08:50Z</dc:date>
    </item>
  </channel>
</rss>

