<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: [delta live tabel] exception: getPrimaryKeys not implemented for debezium in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/delta-live-tabel-exception-getprimarykeys-not-implemented-for/m-p/67037#M33265</link>
    <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/9"&gt;@Retired_mod&lt;/a&gt;&amp;nbsp;Thanks for the reply. But it does not help me much. Do you have more specific advise to help me resolve my problem?&lt;/P&gt;</description>
    <pubDate>Tue, 23 Apr 2024 08:28:27 GMT</pubDate>
    <dc:creator>smedegaard</dc:creator>
    <dc:date>2024-04-23T08:28:27Z</dc:date>
    <item>
      <title>[delta live tabel] exception: getPrimaryKeys not implemented for debezium</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-live-tabel-exception-getprimarykeys-not-implemented-for/m-p/66938#M33239</link>
      <description>&lt;P&gt;&lt;SPAN&gt;I've defined a streaming deltlive table&amp;nbsp;in a notebook using python.&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN&gt;running on "&lt;STRONG&gt;preview&lt;/STRONG&gt;" channel&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN&gt;delta cache accelerated (Standard_D4ads_v5) compute&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;FONT face="courier new,courier"&gt;&lt;SPAN&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;&lt;STRONG&gt;It fails with&lt;/STRONG&gt;&lt;/FONT&gt;&lt;BR /&gt;org.apache.spark.sql.streaming.StreamingQueryException: [STREAM_FAILED] Query [id = xxx, runId = yyy] terminated with exception: getPrimaryKeys not implemented for debezium SQLSTATE: XXKST&lt;BR /&gt;&lt;BR /&gt;&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN&gt;running on "&lt;STRONG&gt;current&lt;/STRONG&gt;" channel&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN&gt;delta cache accelerated (Standard_D4ads_v5) compute&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;FONT face="courier new,courier"&gt;&lt;SPAN&gt;&lt;FONT face="arial,helvetica,sans-serif"&gt;&lt;STRONG&gt;It fails with&lt;/STRONG&gt;&lt;/FONT&gt;&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT face="courier new,courier"&gt;&lt;SPAN&gt;scala.ScalaReflectionException: object com.databricks.cdc.spark.DebeziumJDBCMicroBatchProvider not found.&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;H2&gt;&lt;SPAN&gt;The notebook&lt;/SPAN&gt;&lt;/H2&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import dlt
import pyspark.sql.functions as F

@dlt.table(
  name="sliver_hour_values",
  comment="...",
  table_properties={"quality": "silver"},
  partition_cols=["event_year", "event_month", "event_day_of_month"],
)
@dlt.expect_or_drop("valid_date", F.col("event_datetime").isNotNull())
@dlt.expect_or_drop("valid_report_index", F.col("report_index").isNotNull())
@dlt.expect_or_drop("valid_event_datetime", F.col("event_datetime").isNotNull())
@dlt.expect_or_drop("valid_event_value", F.col("event_value").isNotNull())
def get_hour_values():
  return (
    spark
    .readStream
    .table("mycatalog.myschema.hourvalues")
      .withColumnRenamed('ReportIx', 'report_index')
      .withColumnRenamed('DateTime', 'event_datetime')
      .withColumnRenamed('Value', 'event_value')
      .withColumnRenamed('Quality', 'quality')
    .select(
        "report_index",
        "event_datetime",
        "event_value",
        "quality"
    )
    .withColumn("ingestion_datetime", F.current_timestamp())
    .withColumn("event_year", F.year(F.col("event_datetime")))
    .withColumn("event_month", F.month(F.col("event_datetime")))
    .withColumn("event_day_of_month", F.dayofmonth(F.col("event_datetime")))
  )&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 22 Apr 2024 13:54:39 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-live-tabel-exception-getprimarykeys-not-implemented-for/m-p/66938#M33239</guid>
      <dc:creator>smedegaard</dc:creator>
      <dc:date>2024-04-22T13:54:39Z</dc:date>
    </item>
    <item>
      <title>Re: [delta live tabel] exception: getPrimaryKeys not implemented for debezium</title>
      <link>https://community.databricks.com/t5/data-engineering/delta-live-tabel-exception-getprimarykeys-not-implemented-for/m-p/67037#M33265</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/9"&gt;@Retired_mod&lt;/a&gt;&amp;nbsp;Thanks for the reply. But it does not help me much. Do you have more specific advise to help me resolve my problem?&lt;/P&gt;</description>
      <pubDate>Tue, 23 Apr 2024 08:28:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/delta-live-tabel-exception-getprimarykeys-not-implemented-for/m-p/67037#M33265</guid>
      <dc:creator>smedegaard</dc:creator>
      <dc:date>2024-04-23T08:28:27Z</dc:date>
    </item>
  </channel>
</rss>

