jrod123
New Contributor II

Creating a view first & then a table as you suggested still produces the same result: data in the table is overwritten  (rather than appended) with each run of the pipeline.  Here's a simple code example that I used:

from pyspark.sql import SparkSession
from pyspark.sql.functions import lit
import datetime
import dlt

# Initialize Spark session
spark = SparkSession.builder.appName("Data Ingestion").getOrCreate()

from pyspark.sql.functions import current_timestamp

# Function to generate sample data
def generate_data():
data = [
(1, "A"),
(2, "B"),
(3, "C")
]
df = spark.createDataFrame(data, ["id", "value"])
df = df.withColumn("timestamp", lit(datetime.datetime.now()))
return df

# Define DLT view and table

@Dlt.view(
name="example_view"
)
def create_example_view():
return generate_data()

# # Define the Delta Live Table
@Dlt.table(
name="example_table"
)
def create_example_table():
df = spark.read.table("example_view")
return generate_data()