Вы можете использовать функции concat_ws/concat/array_join
для преобразования всех значений столбца в строку.
In Pyspark:
df.show()
#+--------+-----------+---+
#| Name| Address|Age|
#+--------+-----------+---+
#|fullname|fulladdress|dob|
#+--------+-----------+---+
from pyspark.sql.functions import *
cols=[x for x in df.columns]
df.select(concat_ws(',',*cols).alias("str")).show(10,False)
#or
df.withColumn("str",array_join(array(*cols),",")).drop(*df.columns).show(10,False)
#+--------------------------------+
#|str |
#+--------------------------------+
#|fullname,fulladdress,dob |
#+--------------------------------+
In Scala:
import org.apache.spark.sql.functions._
val cols= df.columns.map(x => col(x))
df.select(concat_ws(",",cols:_*).alias("str")).show(false)
//or
df.withColumn("str",concat_ws(",",cols:_*)).drop(df.columns:_*).show(false)
df.withColumn("str",array_join(array(cols:_*),",")).drop(df.columns:_*).show(10,false)
//+------------------------+
//|str |
//+------------------------+
//|fullname,fulladdress,dob|
//+------------------------+
For keys:
from pyspark.sql.functions import *
keys=",".join(df.columns)
df.withColumn("keys",lit(keys)).show()