Проверьте код ниже.
scala> df.show(false)
+-------+-------+------------------------------------------+-------+
|column1|column2|column3 |column4|
+-------+-------+------------------------------------------+-------+
|a |b |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|c |
|x |y |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|z |
+-------+-------+------------------------------------------+-------+
UDF
scala> val mkString = udf((data:Seq[Seq[Int]]) => data.map(_.init.last).mkString(","))
Результат
scala> df.withColumn("agg_values",mkString($"column3")).show(false)
+-------+-------+------------------------------------------+-------+----------+
|column1|column2|column3 |column4|agg_values|
+-------+-------+------------------------------------------+-------+----------+
|a |b |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|c |3,6,9 |
|x |y |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|z |3,6,9 |
+-------+-------+------------------------------------------+-------+----------+
Без UDF - Spark 2.4 +
scala>
df
.withColumn("agg_values",expr("concat_ws(',',flatten(transform(column3, x -> slice(x,-2,1))))"))
.show(false)
+-------+-------+------------------------------------------+-------+----------+
|column1|column2|column3 |column4|agg_values|
+-------+-------+------------------------------------------+-------+----------+
|a |b |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|c |3,6,9 |
|x |y |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|z |3,6,9 |
+-------+-------+------------------------------------------+-------+----------+
scala>
df
.withColumn("agg_values",expr("concat_ws(',',transform(column3, x -> element_at(x,-2)))"))
.show(false)
+-------+-------+------------------------------------------+-------+----------+
|column1|column2|column3 |column4|agg_values|
+-------+-------+------------------------------------------+-------+----------+
|a |b |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|c |3,6,9 |
|x |y |[[1, 2, 3, 4], [4, 5, 6, 7], [7, 8, 9, 0]]|z |3,6,9 |
+-------+-------+------------------------------------------+-------+----------+