Вы можете объединить столбцы в массив
import org.apache.spark.sql.functions._
val df = Seq((null, "A"), ("B", null), ("C", "D"), (null, null)).toDF("colA", "colB")
val cols = array(df.columns.map(c =>
// If column is not null, merge it with its name otherwise null
when(col(c).isNotNull, concat_ws(":", lit(c), col(c)))): _*
)
и используйте UserDefinedFunction
val combine = udf((xs: Seq[String]) => {
val tmp = xs.filter { _ != null }.mkString(",")
s"{$tmp}"
})
df.withColumn("col3", combine(cols)).show
// +----+----+---------------+
// |colA|colB| col3|
// +----+----+---------------+
// |null| A| {colB:A}|
// | B|null| {colA:B}|
// | C| D|{colA:C,colB:D}|
// |null|null| {}|
// +----+----+---------------+