You can use concat_ws for concatenating row values into one column.
Eg:
from pyspark.sql.functions import concat_ws,col
from pyspark.sql.types import StructType,StructField, StringType
data = [("A1","B1","C1","D1"),
("A2","B2","C2","D2"),
("A3","B3","C3","D3"),
("A4","B4","C3","D4")
]
schema = StructType([ \
StructField("A",StringType(),True), \
StructField("B",StringType(),True), \
StructField("C",StringType(),True),\
StructField("D",StringType(),True)
])
df = spark.createDataFrame(data=data,schema=schema)
df.printSchema()
df.show()
df.select(concat_ws('_',df.A,df.B,df.C).alias("ABC"),"D").show()
Output:
root
|-- A: string (nullable = true)
|-- B: string (nullable = true)
|-- C: string (nullable = true)
|-- 😧 string (nullable = true)
+---+---+---+---+
| A| B| C| D|
+---+---+---+---+
| A1| B1| C1| D1|
| A2| B2| C2| D2|
| A3| B3| C3| D3|
| A4| B4| C3| D4|
+---+---+---+---+
+--------+---+
| ABC| D|
+--------+---+
|A1_B1_C1| D1|
|A2_B2_C2| D2|
|A3_B3_C3| D3|
|A4_B4_C3| D4|
+--------+---+