Скажем, у вас есть JSON со следующей структурой:
{
"array": [
{
"a": "asdf",
"b": 1234,
"c": "a",
"d": "str",
"e": 1234
},
{
"a": "asdf",
"b": 1234,
"c": "a",
"d": "str",
"e": 1234
},
{
"a": "asdf",
"b": 1234,
"c": "a",
"d": "str",
"e": 1234
}
]
}
- Прочитать файл
scala> val nested = spark.read.option("multiline",true).json("nested.json")
nested: org.apache.spark.sql.DataFrame = [array: array<struct<a:string,b:bigint,c:string,d:string,e:bigint>>]
- Проверьте схему
scala> nested.printSchema
root
|-- array: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- a: string (nullable = true)
| | |-- b: long (nullable = true)
| | |-- c: string (nullable = true)
| | |-- d: string (nullable = true)
| | |-- e: long (nullable = true)
- Использование
explode
функция
scala> nested.select(explode($"array").as("exploded")).select("exploded.*").show
+----+----+---+---+----+
| a| b| c| d| e|
+----+----+---+---+----+
|asdf|1234| a|str|1234|
|asdf|1234| a|str|1234|
|asdf|1234| a|str|1234|
+----+----+---+---+----+