To ensure that the "value" field retains its integer type, you can explicitly cast it after parsing the JSON.
from pyspark.sql.functions import col, from_json, expr
from pyspark.sql.types import StructType, StructField, IntegerType, ArrayType, LongType
sales_schema = StructType([
StructField("customer_id", IntegerType(), True),
StructField("order_numbers", ArrayType(LongType()), True),
StructField("data", StructType([
StructField("value", IntegerType())
]))
])
@dlt.table(
schema=sales_schema
)
def sales():
df = spark.readStream.table("table_name") \
.withColumn("parsed_data", from_json(col("data"), sales_schema)) \
.select("parsed_data.*") \
.withColumn("value", col("value").cast(IntegerType()))
return df