Это потому, что эти поля class, class_timestamps, senstense пусты. Он не может получить схему, не зная данных внутри. Таким образом, чтобы получить пример схемы, вам нужно предоставить хотя бы одну полностью заполненную строку json.
val stringTest1 = """{
"total_count": 123,
"page_size": 20,
"another_id": "gdbfdbfdbd",
"sen": [{
"id": 123,
"ses_id": 12424343,
"columns": {
"blah": "blah",
"count": 1234
},
"class": {"name":"className"},
"class_timestamps": {"timestamp1" : 1234},
"sentence": "spark is good"
}]
}
"""
val stringTest =
"""{
"total_count": 123,
"page_size": 20,
"another_id": "gdbfdbfdbd",
"sen": [{
"id": 123,
"ses_id": 12424343,
"columns": {
"blah": "blah",
"count": 1234
},
"class": {},
"class_timestamps": {},
"sentence": "spark is good"
}]
}
"""
import spark.implicits._
val df = spark.read.json(Seq(stringTest,stringTest1).toDS)
df.printSchema()
Схема -
root
|-- another_id: string (nullable = true)
|-- page_size: long (nullable = true)
|-- sen: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- class: struct (nullable = true)
| | | |-- name: string (nullable = true)
| | |-- class_timestamps: struct (nullable = true)
| | | |-- timestamp1: long (nullable = true)
| | |-- columns: struct (nullable = true)
| | | |-- blah: string (nullable = true)
| | | |-- count: long (nullable = true)
| | |-- id: long (nullable = true)
| | |-- sentence: string (nullable = true)
| | |-- ses_id: long (nullable = true)
|-- total_count: long (nullable = true)