Проще просто выполнить необходимые преобразования вложенных элементов строки в DataFrame с помощью map
и переименовать столбец с помощью toDF
:
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._
import spark.implicits._
case class Additional(id: String, item_value: String)
case class Element(income: String, currency: String, additional: Additional)
case class Additional2(id: String, item_value: String, extra2: String)
case class Element2(income: String, currency: String, additional: Additional2)
val df = Seq(
(Seq(Element("70k", "US", Additional("1", "101")), Element("90k", "US", Additional("2", "202")))),
(Seq(Element("80k", "US", Additional("3", "303"))))
).toDF("myElements")
val df2 = df.map{ case Row(s: Seq[Row] @unchecked) => s.map{
case Row(income: String, currency: String, additional: Row) => additional match {
case Row(id: String, item_value: String) =>
Element2(income, currency, Additional2(id, item_value, null))
}}
}.toDF("myElements")
df2.show(false)
// +--------------------------------------------+
// |myElements |
// +--------------------------------------------+
// |[[70k, US, [1, 101,]], [90k, US, [2, 202,]]]|
// |[[80k, US, [3, 303,]]] |
// +--------------------------------------------+
df2.printSchema
// root
// |-- myElements: array (nullable = true)
// | |-- element: struct (containsNull = true)
// | | |-- income: string (nullable = true)
// | | |-- currency: string (nullable = true)
// | | |-- additional: struct (nullable = true)
// | | | |-- id: string (nullable = true)
// | | | |-- item_value: string (nullable = true)
// | | | |-- extra2: string (nullable = true)
Если по какой-то причине предпочтительным является UDF,необходимые преобразования по сути одинаковы:
val myUDF = udf((s: Seq[Row]) => s.map{
case Row(income: String, currency: String, additional: Row) => additional match {
case Row(id: String, item_value: String) =>
Element2(income, currency, Additional2(id, item_value, null))
}
})
val df2 = df.select(myUDF($"myElements").as("myElements"))