Проблема здесь в том, что вам нужно управлять делом для ArrayType
, а затем преобразовать его в StructType
.Поэтому вы можете использовать для этого преобразование среды выполнения Scala.
Сначала я сгенерировал сценарий следующим образом (кстати, было бы очень полезно включить это в ваш вопрос, поскольку это значительно облегчает воспроизведение проблемы):
case class DimapraUnit(code: String, constrained: Boolean, id: Long, label: String, ranking: Long, _type: String, version: Long, visible: Boolean)
case class AvailabilityEngagement(dimapraUnit: DimapraUnit)
case class Element(availabilityEngagement: AvailabilityEngagement)
case class Engagement(engagementItems: Array[Element])
case class root(engagement: Engagement)
def getSchema(): StructType ={
import org.apache.spark.sql.types._
import org.apache.spark.sql.catalyst.ScalaReflection
val schema = ScalaReflection.schemaFor[root].dataType.asInstanceOf[StructType]
schema.printTreeString()
schema
}
Это выведет:
root
|-- engagement: struct (nullable = true)
| |-- engagementItems: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- availabilityEngagement: struct (nullable = true)
| | | | |-- dimapraUnit: struct (nullable = true)
| | | | | |-- code: string (nullable = true)
| | | | | |-- constrained: boolean (nullable = false)
| | | | | |-- id: long (nullable = false)
| | | | | |-- label: string (nullable = true)
| | | | | |-- ranking: long (nullable = false)
| | | | | |-- _type: string (nullable = true)
| | | | | |-- version: long (nullable = false)
| | | | | |-- visible: boolean (nullable = false)
Затем я изменил вашу функцию, добавив дополнительную проверку для ArrayType и преобразовав ее в StructType, используя asInstanceOf
:
import org.apache.spark.sql.types._
def flattenSchema(schema: StructType, prefix: String = null):Array[Column]=
{
schema.fields.flatMap(f => {
val colName = if (prefix == null) f.name else (prefix + "." + f.name)
f.dataType match {
case st: StructType => flattenSchema(st, colName)
case at: ArrayType =>
val st = at.elementType.asInstanceOf[StructType]
flattenSchema(st, colName)
case _ => Array(new Column(colName).alias(colName))
}
})
}
И, наконец, результаты:
val s = getSchema()
val res = flattenSchema(s)
res.foreach(println(_))
Вывод:
engagement.engagementItems.availabilityEngagement.dimapraUnit.code AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.code`
engagement.engagementItems.availabilityEngagement.dimapraUnit.constrained AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.constrained`
engagement.engagementItems.availabilityEngagement.dimapraUnit.id AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.id`
engagement.engagementItems.availabilityEngagement.dimapraUnit.label AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.label`
engagement.engagementItems.availabilityEngagement.dimapraUnit.ranking AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.ranking`
engagement.engagementItems.availabilityEngagement.dimapraUnit._type AS `engagement.engagementItems.availabilityEngagement.dimapraUnit._type`
engagement.engagementItems.availabilityEngagement.dimapraUnit.version AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.version`
engagement.engagementItems.availabilityEngagement.dimapraUnit.visible AS `engagement.engagementItems.availabilityEngagement.dimapraUnit.visible`