<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Error while trying to implement Change Data Capture in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8065#M3789</link>
    <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;I'm new to databricks and learning towards taking up Associate Engineer Certification.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;While going through the section "Build Data Pipelines with Delta Live Tables". &lt;/P&gt;&lt;P&gt;I'm trying to implement Change Data Capture, but it is erroring out when executing the workflow. &lt;/P&gt;&lt;P&gt;'m not sure if my code is incorrect as It is similar to what we have in the course material. Please see details below and kindly let me know how to fix this.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Screenshot of the Notebook used in the definition of the Pipeline.&lt;/P&gt;&lt;P&gt;Scroll down for the Code text and Error Text.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/563i6322E8D5E4F1E565/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;B&gt;&lt;U&gt;Code&lt;/U&gt;&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE OR REFRESH STREAMING LIVE TABLE SCD2_RAW&lt;/P&gt;&lt;P&gt;AS select current_timestamp() load_time, right(input_file_name(),13) source_file, * from json.`dbfs:/FileStore/tables/J_File_1.json`;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE OR REFRESH STREAMING LIVE TABLE SCD2_SILVER;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;APPLY CHANGES INTO LIVE.SCD2_SILVER&lt;/P&gt;&lt;P&gt;&amp;nbsp;FROM STREAM(LIVE.SCD2_RAW)&lt;/P&gt;&lt;P&gt;&amp;nbsp;KEYS (userid)&lt;/P&gt;&lt;P&gt;&amp;nbsp;SEQUENCE BY load_time&lt;/P&gt;&lt;P&gt;&amp;nbsp;COLUMNS * EXCEPT (load_time, source_file);&lt;/P&gt;&lt;P&gt;--&amp;nbsp;&amp;nbsp;STORED AS SCD TYPE 1&lt;/P&gt;&lt;P&gt;--&amp;nbsp;&amp;nbsp;TRACK HISTORY ON (userid, name, city);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;&lt;U&gt;Error&lt;/U&gt;&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;org.apache.spark.sql.AnalysisException: 'SCD2_RAW' is a streaming table, but 'SCD2_RAW' was not read as a stream. Either remove the STREAMING keyword after the CREATE clause or read the input as a stream rather than a table.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
    <pubDate>Thu, 09 Mar 2023 01:41:43 GMT</pubDate>
    <dc:creator>GURUPRASAD</dc:creator>
    <dc:date>2023-03-09T01:41:43Z</dc:date>
    <item>
      <title>Error while trying to implement Change Data Capture</title>
      <link>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8065#M3789</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;I'm new to databricks and learning towards taking up Associate Engineer Certification.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;While going through the section "Build Data Pipelines with Delta Live Tables". &lt;/P&gt;&lt;P&gt;I'm trying to implement Change Data Capture, but it is erroring out when executing the workflow. &lt;/P&gt;&lt;P&gt;'m not sure if my code is incorrect as It is similar to what we have in the course material. Please see details below and kindly let me know how to fix this.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Screenshot of the Notebook used in the definition of the Pipeline.&lt;/P&gt;&lt;P&gt;Scroll down for the Code text and Error Text.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/563i6322E8D5E4F1E565/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;B&gt;&lt;U&gt;Code&lt;/U&gt;&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE OR REFRESH STREAMING LIVE TABLE SCD2_RAW&lt;/P&gt;&lt;P&gt;AS select current_timestamp() load_time, right(input_file_name(),13) source_file, * from json.`dbfs:/FileStore/tables/J_File_1.json`;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE OR REFRESH STREAMING LIVE TABLE SCD2_SILVER;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;APPLY CHANGES INTO LIVE.SCD2_SILVER&lt;/P&gt;&lt;P&gt;&amp;nbsp;FROM STREAM(LIVE.SCD2_RAW)&lt;/P&gt;&lt;P&gt;&amp;nbsp;KEYS (userid)&lt;/P&gt;&lt;P&gt;&amp;nbsp;SEQUENCE BY load_time&lt;/P&gt;&lt;P&gt;&amp;nbsp;COLUMNS * EXCEPT (load_time, source_file);&lt;/P&gt;&lt;P&gt;--&amp;nbsp;&amp;nbsp;STORED AS SCD TYPE 1&lt;/P&gt;&lt;P&gt;--&amp;nbsp;&amp;nbsp;TRACK HISTORY ON (userid, name, city);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;&lt;U&gt;Error&lt;/U&gt;&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;org.apache.spark.sql.AnalysisException: 'SCD2_RAW' is a streaming table, but 'SCD2_RAW' was not read as a stream. Either remove the STREAMING keyword after the CREATE clause or read the input as a stream rather than a table.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Thu, 09 Mar 2023 01:41:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8065#M3789</guid>
      <dc:creator>GURUPRASAD</dc:creator>
      <dc:date>2023-03-09T01:41:43Z</dc:date>
    </item>
    <item>
      <title>Re: Error while trying to implement Change Data Capture</title>
      <link>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8066#M3790</link>
      <description>&lt;P&gt;Having had a quick look, I think your error is because you are trying to add SCD to a STREAMING LIVE table.  I believe APPLY CHANGES INTO cannot be used on a streaming table.&lt;/P&gt;&lt;P&gt;You can use a streaming table as a source though.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Simply changing this line:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;CREATE OR REFRESH STREAMING LIVE TABLE SCD2_SILVER;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;to:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;CREATE OR REFRESH LIVE TABLE SCD2_SILVER;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;should be sufficient.&lt;/P&gt;&lt;P&gt;Do make sure you are running a compatible version of databricks.  Also, if you want to use Track History, you need to set the pipeline cluster config:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;pipelines.enableTrackHistory&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;to true.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I've also found the databricks SQL parser to give syntax errors sometimes and a little experimenting with removing line breaks, etc. can sometimes help track down errors.&lt;/P&gt;</description>
      <pubDate>Thu, 09 Mar 2023 15:11:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8066#M3790</guid>
      <dc:creator>Kearon</dc:creator>
      <dc:date>2023-03-09T15:11:29Z</dc:date>
    </item>
    <item>
      <title>Re: Error while trying to implement Change Data Capture</title>
      <link>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8067#M3791</link>
      <description>&lt;P&gt;Thank you Kearon for taking time to answer.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I tried implementing the change suggested but seeing a different error now.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;Error :&lt;/B&gt;&lt;/P&gt;&lt;P&gt;org.apache.spark.sql.AnalysisException: Unsupported SQL statement for table 'SCD2_SILVER': Missing query is not supported.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;Modified Code:&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE OR REFRESH STREAMING LIVE TABLE SCD2_RAW&lt;/P&gt;&lt;P&gt;AS select current_timestamp() load_time, right(input_file_name(),13) source_file, * from json.`dbfs:/FileStore/tables/J_File_1.json`;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE OR REFRESH LIVE TABLE SCD2_SILVER;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;APPLY CHANGES INTO LIVE.SCD2_SILVER&lt;/P&gt;&lt;P&gt;&amp;nbsp;FROM STREAM(LIVE.SCD2_RAW)&lt;/P&gt;&lt;P&gt;&amp;nbsp;KEYS (userid)&lt;/P&gt;&lt;P&gt;&amp;nbsp;SEQUENCE BY load_time&lt;/P&gt;&lt;P&gt;&amp;nbsp;COLUMNS * EXCEPT (load_time, source_file);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/565i52DDD18368F7CE61/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 01:01:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8067#M3791</guid>
      <dc:creator>GURUPRASAD</dc:creator>
      <dc:date>2023-03-10T01:01:02Z</dc:date>
    </item>
    <item>
      <title>Re: Error while trying to implement Change Data Capture</title>
      <link>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8068#M3792</link>
      <description>&lt;P&gt;@GURUPRASAD MADAPURA VENKATESHAIAH​, I've had that error before. Trying to remember the cause....&lt;/P&gt;&lt;P&gt;I'll try to remember. In the mean time, here is a notebook I have that is very similar and works for me. If you try building yours up, step by step, using this as a template, hopefully that will do it. Obviously, you don't need the json explosion parts or the where clause. That's just to handle some messiness in the data I am processing.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;CREATE OR REFRESH STREAMING LIVE TABLE currStudents_ingest
AS SELECT
  col.*
  ,file_modification_time
FROM  (
SELECT fi.file_modification_time, EXPLODE_OUTER (fi.students)
FROM STREAM(LIVE.currStudents_streamFiles) AS fi 
)
WHERE col.id IS NOT NULL
;
&amp;nbsp;
CREATE OR REFRESH STREAMING LIVE TABLE currStudents_SCD;
&amp;nbsp;
APPLY CHANGES INTO
  live.currStudents_SCD
FROM
  stream(live.currStudents_ingest)
KEYS
  (id)
SEQUENCE BY
  file_modification_time
STORED AS
  SCD TYPE 2
TRACK HISTORY ON * EXCEPT (file_modification_time)
;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 09:17:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/error-while-trying-to-implement-change-data-capture/m-p/8068#M3792</guid>
      <dc:creator>Kearon</dc:creator>
      <dc:date>2023-03-10T09:17:05Z</dc:date>
    </item>
  </channel>
</rss>

