Идея @Kris - это Истина; взорваться, а затем упасть. Я нашел пример здесь .
Я изменил результат имени атрибута, потому что у меня есть другое имя результата, чтобы избежать путаницы при разнесении:
Шаг 1: (вход)
|-- timestamp: long (nullable = true)
|-- hopDetails: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- hop: long (nullable = true)
| | |-- result: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- from: string (nullable = true)
| | | | |-- rtt: double (nullable = true)
| | | | |-- size: long (nullable = true)
| | | | |-- ttl: long (nullable = true)
Шаг 2:
Код:
var exploded_1 = renamed_newDF
.withColumn("hop", explode(renamed_newDF("hopDetails.hop")))
.withColumn("result", explode(renamed_newDF("hopDetails.result")))
.drop("hopDetails")
exploded_1.printSchema
Схема вывода:
|-- timestamp: long (nullable = true)
|-- hop: long (nullable = true)
|-- result: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- from: string (nullable = true)
| | |-- rtt: double (nullable = true)
| | |-- size: long (nullable = true)
| | |-- ttl: long (nullable = true)
Шаг 3:
Код:
var exploded_2 = exploded_1
.withColumn("from", explode(exploded_1("result.from")))
.withColumn("rtt", explode(exploded_1("result.rtt")))
.withColumn("size", explode(exploded_1("result.size")))
.withColumn("ttl", explode(exploded_1("result.ttl")))
.drop("result")
exploded_2.printSchema
Схема:
root
|-- af: long (nullable = true)
|-- dst_addr: string (nullable = true)
|-- from: string (nullable = true)
|-- msm_id: long (nullable = true)
|-- prb_id: long (nullable = true)
|-- src_addr: string (nullable = true)
|-- timestamp: long (nullable = true)
|-- hop: long (nullable = true)
|-- rtt: double (nullable = true)
|-- size: long (nullable = true)
|-- ttl: long (nullable = true)