Это схема данных, которую мы хотели извлечь из этого. Попытался использовать df3 = df.select (df.transcript.data.from.alias ("Type")) и получить недопустимую синтаксическую ошибку.
Как извлечь это.
root
|-- contactId: long (nullable = true)
|-- mediaLegId: string (nullable = true)
|-- transcript: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- action: string (nullable = true)
| | |-- data: struct (nullable = true)
| | | |-- chatId: string (nullable = true)
| | | |-- customerInfo: struct (nullable = true)
| | | | |-- customerIdentifierToken: string (nullable = true)
| | | | |-- customerIdentifierType: string (nullable = true)
| | | | |-- customerName: string (nullable = true)
| | | | |-- initialQuestion: string (nullable = true)
| | | |-- entryPoint: string (nullable = true)
| | | |-- from: string (nullable = true)
| | | |-- lang: string (nullable = true)
| | | |-- parkDuration: long (nullable = true)
| | | |-- parkNote: string (nullable = true)
| | | |-- participant: struct (nullable = true)
| | | | |-- disconnectReason: string (nullable = true)
| | | | |-- displayName: string (nullable = true)
| | | | |-- participantId: string (nullable = true)
| | | | |-- preferences: struct (nullable = true)
| | | | | |-- language: string (nullable = true)
| | | | |-- state: string (nullable = true)
| | | | |-- userName: string (nullable = true)
| | | |-- reconnected: boolean (nullable = true)
| | | |-- relatedData: string (nullable = true)
| | | |-- text: string (nullable = true)
| | | |-- timestamp: long (nullable = true)
| | | |-- transcriptText: string (nullable = true)
| | | |-- transferNote: string (nullable = true)
| | | | - transcriptText: string (nullable = true) | | | | - TransferNote: строка (nullable = true)