Пожалуйста, проверьте, помогает ли это -
1. Загрузите данные
val data = """{"Id":"31279605299","Type":"12121212","client":"Checklist _API","eventTime":"2020-03-17T15:50:30.640Z","eventType":"Event","payload":{"sourceApp":"ios","questionnaire":{"version":"1.0","question":"How to resolve ? ","fb":"Na"}}} """
val df = Seq(data).toDF("jsonCol")
df.show(false)
df.printSchema()
Output-
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|jsonCol |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|{"Id":"31279605299","Type":"12121212","client":"Checklist _API","eventTime":"2020-03-17T15:50:30.640Z","eventType":"Event","payload":{"sourceApp":"ios","questionnaire":{"version":"1.0","question":"How to resolve ? ","fb":"Na"}}} |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
root
|-- jsonCol: string (nullable = true)
2. извлеките строку json для разделения полей
df.select(json_tuple(col("jsonCol"), "Id", "Type", "client", "eventTime", "eventType", "payload"))
.show(false)
Output-
+-----------+--------+--------------+------------------------+-----+----------------------------------------------------------------------------------------------+
|c0 |c1 |c2 |c3 |c4 |c5 |
+-----------+--------+--------------+------------------------+-----+----------------------------------------------------------------------------------------------+
|31279605299|12121212|Checklist _API|2020-03-17T15:50:30.640Z|Event|{"sourceApp":"ios","questionnaire":{"version":"1.0","question":"How to resolve ? ","fb":"Na"}}|
+-----------+--------+--------------+------------------------+-----+----------------------------------------------------------------------------------------------+
3. используя from_json(..)
val processed = df.select(
expr("from_json(jsonCol, 'struct<Id:string,Type:string,client:string,eventTime:string, eventType:string," +
"payload:struct<questionnaire:struct<fb:string,question:string,version:string>,sourceApp:string>>')")
.as("json_converted"))
processed.show(false)
processed.printSchema()
Выход-
+-------------------------------------------------------------------------------------------------------------+
|json_converted |
+-------------------------------------------------------------------------------------------------------------+
|[31279605299, 12121212, Checklist _API, 2020-03-17T15:50:30.640Z, Event, [[Na, How to resolve ? , 1.0], ios]]|
+-------------------------------------------------------------------------------------------------------------+
root
|-- json_converted: struct (nullable = true)
| |-- Id: string (nullable = true)
| |-- Type: string (nullable = true)
| |-- client: string (nullable = true)
| |-- eventTime: string (nullable = true)
| |-- eventType: string (nullable = true)
| |-- payload: struct (nullable = true)
| | |-- questionnaire: struct (nullable = true)
| | | |-- fb: string (nullable = true)
| | | |-- question: string (nullable = true)
| | | |-- version: string (nullable = true)
| | |-- sourceApp: string (nullable = true)