Hi @abueno .
I'm assuming you're asking how to do this in pyspark. You can use when and otherwise conditional functions to achieve your expected result:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType
# Schema
schema = StructType([
StructField("column1", StringType(), True),
StructField("column2", StringType(), True),
StructField("column3", StringType(), True),
])
# Sample data
data = [
("2024", "DE", "original1"),
("2024", "SC", "original2"),
("2024", "FR", "original3"),
("2023", "DE", "original4"),
("2023", "SC", "original5"),
("2023", "IT", "original6"),
("2022", "DE", "original7"),
("2025", "NL", "original8"),
]
# Create DataFrame
df = spark.createDataFrame(data, schema)
# Show initial DataFrame
df.show(truncate=False)
from pyspark.sql.functions import when, col
df = df.withColumn(
"result_column",
when((col("column1") == "2024") & (col("column2").isin("DE", "SC")), "value1")
.when((col("column1") == "2024"), "value2")
.when((col("column1") == "2023") & (col("column2").isin("DE", "SC")), "value3")
.when((col("column1") == "2023"), "value4")
.otherwise(col("column3"))
)