Hi,
This is my first databricks project. I am loading data from a UC external volume in ADLS into tables and then split one of the tables into two tables based on a column. When I create a pipeline, the tables don't have any dependencies and this is causing issues when re-running the jobs during development. What do I have wrong here? I was under the impression that DLT automatically senses the dependencies and no config is needed.
Thanks!
Here is my SQL code - it is all in one cell:
/*Alarm Data (this is not used downstream)*/
CREATE OR REFRESH STREAMING TABLE RNI_Alarms(
CONSTRAINT timestamp_is_not_null EXPECT (TimeStamp IS NOT NULL),
CONSTRAINT meterid_is_not_null EXPECT (MeterID IS NOT NULL),
CONSTRAINT commodity_is_not_null EXPECT (Commodity IS NOT NULL)
)
AS
SELECT *, _metadata.file_path as file_path
FROM STREAM read_files(
'/Volumes/ami/landing/rni/Alarm/', format => 'CSV', schema => 'RecordType string, RecordVersion string, SenderID string, SenderCustomerID string,ReceiverID string,ReceiverCustomerID string,TimeStamp string,MeterID string,Purpose string,Commodity string,Units string,CalculationConstant int,Interval int,Count int,Data1 string,Data2 string,Data3 string');
/*Interval Data - Step 1 - Get Raw Data*/
CREATE OR REFRESH STREAMING TABLE RNI_Interval(
CONSTRAINT timestamp_is_not_null EXPECT (TimeStamp IS NOT NULL),
CONSTRAINT meterid_is_not_null EXPECT (MeterID IS NOT NULL),
CONSTRAINT commodity_is_not_null EXPECT (Commodity IS NOT NULL)
)
AS
SELECT *, _metadata.file_path as file_path
FROM STREAM read_files(
'/Volumes/ami/landing/rni/Interval/', format => 'CSV', header => 'false', schema => 'RecordType string, RecordVersion string, SenderID string,SenderCustomerID string,ReceiverID string,ReceiverCustomerID string,TimeStamp string,MeterID string,Purpose string,Commodity string,Units string,CalculationConstant int,Interval int,Count int,_c14 string,_c15 string,_c16 string,_c17 string,_c18 string,_c19 string,_c20 string,_c21 string,_c22 string,_c23 string,_c24 string,_c25 string,_c26 string,_c27 string,_c28 string,_c29 string,_c30 string,_c31 string,_c32 string,_c33 string,_c34 string,_c35 string,_c36 string,_c37 string,_c38 string,_c39 string,_c40 string,_c41 string,_c42 string,_c43 string,_c44 string,_c45 string,_c46 string,_c47 string,_c48 string,_c49 string,_c50 string,_c51 string,_c52 string,_c53 string,_c54 string,_c55 string,_c56 string,_c57 string,_c58 string,_c59 string,_c60 string,_c61 string,_c62 string,_c63 string,_c64 string,_c65 string,_c66 string,_c67 string,_c68 string,_c69 string,_c70 string,_c71 string,_c72 string,_c73 string,_c74 string,_c75 string,_c76 string,_c77 string,_c78 string,_c79 string,_c80 string,_c81 string,_c82 string,_c83 string,_c84 string,_c85 string
');
/* Interval data step 2 of 3 - Split to two tables*/
CREATE OR REFRESH STREAMING TABLE RNI_Interval_Register(
CONSTRAINT timestamp_is_not_null EXPECT (TimeStamp IS NOT NULL),
CONSTRAINT meterid_is_not_null EXPECT (MeterID IS NOT NULL),
CONSTRAINT commodity_is_not_null EXPECT (Commodity IS NOT NULL)
)
AS
SELECT
*
FROM STREAM(ami.bronze.RNI_Interval) where Units like '%REG';
CREATE OR REFRESH STREAMING TABLE RNI_Interval_Reads(
CONSTRAINT timestamp_is_not_null EXPECT (TimeStamp IS NOT NULL),
CONSTRAINT meterid_is_not_null EXPECT (MeterID IS NOT NULL),
CONSTRAINT commodity_is_not_null EXPECT (Commodity IS NOT NULL)
)
AS
SELECT
*
FROM STREAM(ami.bronze.RNI_Interval) where Units not like '%REG';