этот код может быть вам полезен,
import org.apache.spark.sql.functions._
val df=Seq(("True","False","False","False","False","False","False","False","True"),("False","False","True","True","False","False","False","False","False"),("False","False","True","False","True","False","False","False","False"),("False","False","False","False","False","True","True","False","False")).toDF("very_hot","hot","cold","little_snow","medium_snow","very_cold","deep_snow","freezing","windy")
df.show()
/*
+--------+-----+-----+-----------+-----------+---------+---------+--------+-----+
|very_hot| hot| cold|little_snow|medium_snow|very_cold|deep_snow|freezing|windy|
+--------+-----+-----+-----------+-----------+---------+---------+--------+-----+
| True|False|False| False| False| False| False| False| True|
| False|False| True| True| False| False| False| False|False|
| False|False| True| False| True| False| False| False|False|
| False|False|False| False| False| True| True| False|False|
+--------+-----+-----+-----------+-----------+---------+---------+--------+-----+
*/
val df1=df.withColumn("features", concat_ws(",",
when(col("very_hot").contains("True"), "very_hot"),
when(col("hot").contains("True"), "hot"),
when(col("cold").contains("True"), "cold"),
when(col("little_snow").contains("True"), "little_snow"),
when(col("medium_snow").contains("True"), "medium_snow"),
when(col("very_cold").contains("True"), "very_cold"),
when(col("deep_snow").contains("True"), "deep_snow"),
when(col("freezing").contains("True"), "freezing"),
when(col("windy").contains("True"), "windy")
)).drop("very_hot").drop("hot").drop("cold").drop("little_snow").drop("medium_snow").drop("very_cold").drop("deep_snow").drop("freezing").drop("windy")
df1.show()
/*
+-------------------+
| features|
+-------------------+
| very_hot,windy|
| cold,little_snow|
| cold,medium_snow|
|very_cold,deep_snow|
+-------------------+
*/