Options
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
10-24-2023 08:52 AM
@SRK Please see a copy of this answer on stackoverflow here.
You can use DLT Expectations to have this check (see my previous answer if you're using SQL and not Python):
@dlt.table(
name="table1",
)
def create_df():
schema = T.StructType([
T.StructField("id", T.IntegerType(), True),
T.StructField("name", T.StringType(), True),
T.StructField("age", T.IntegerType(), True)])
data = [(1, "Alice", 25),
(1, "Bob", 30),
(3, "Charlie", 40)]
df = spark.createDataFrame(data, schema)
return df
@dlt.table(name="table2")
@dlt.expect("unique_pk", "num_entries = 1")
def create_df():
df = dlt.read("table1")
df = df.groupBy("id").count().withColumnRenamed("count","num_entries")
return df