У меня есть многострочный json. Когда я делаю df.dtypes моего json файла, у меня есть этот список кортежей
[('bundleId', 'string'), ('id', 'string'), ('identities','struct<applicationIdentities:array<struct<isNetworkAuthenticated:boolean,type:string,userProfile:struct<civilite:string,idAsc:string,logins:array<string>,nom:string,prenom:string,ts:string>,value:string>>,networkIdentities:array<string>>'),('request','struct<application:struct<id:string,pushConnector:string,pushSettings:bigint,pushToken:string,versionCode:bigint,versionName:string>,device:struct<freeSpaceInMb:bigint,id:string,lastReboot:string,manufacturer:string,name:string,ramInMb:bigint,totalSpaceInMb:bigint,version:string>,network:struct<bearer:string,ssid:string>,os:struct<name:string,versionCode:bigint,versionName:string>,sessions:array<struct<application:struct<id:string,versionCode:bigint,versionName:string>,tags:array<struct<key:string,kv:struct<geolocation:string,login:string,npvr:string,rmcsport.offer:string,rmcsport.status:string,startover:string,status:string>,ts:string,type:string,value:string>>,trigger:string,ts:string,type:string>>,ts:string>'), ('service', 'string'), ('ts', 'string'), ('version', 'bigint')]
Я хочу преобразовать это в схему DDL, например это:
CREATE EXTERNAL TABLE json_test.test (
'maintainer' STRING COMMENT '',
'docs_url' NULL COMMENT '',
'requires_python' STRING COMMENT '',
'maintainer_email' STRING COMMENT '',
'cheesecake_code_kwalitee_id' NULL COMMENT '',
'keywords' STRING COMMENT '',
'upload_time' STRING COMMENT '',
'requirements' ARRAY<STRUCT<
'name': STRING COMMENT '',
'specs': ARRAY<ARRAY> COMMENT '',
> COMMENT '',
> COMMENT '',
'author' STRING COMMENT '',
'home_page' STRING COMMENT '',
'github_user' STRING COMMENT '',
'download_url' STRING COMMENT '',
'platform' STRING COMMENT '',
'version' STRING COMMENT '',
'cheesecake_documentation_id' NULL COMMENT '',
'description' STRING COMMENT '',
'lcname' STRING COMMENT '',
'bugtrack_url' NULL COMMENT '',
'github' BOOLEAN COMMENT '',
'coveralls' BOOLEAN COMMENT '',
'name' STRING COMMENT '',
'license' STRING COMMENT '',
'travis_ci' BOOLEAN COMMENT '',
'github_project' STRING COMMENT '',
'summary' STRING COMMENT '',
'split_keywords' ARRAY<STRING> COMMENT '',
'author_email' STRING COMMENT '',
'urls' ARRAY<STRUCT<
'has_sig': BOOLEAN COMMENT '',
'upload_time': STRING COMMENT '',
'comment_text': STRING COMMENT '',
'python_version': STRING COMMENT '',
'url': STRING COMMENT '',
'md5_digest': STRING COMMENT '',
'downloads': INTEGER COMMENT '',
'filename': STRING COMMENT '',
'packagetype': STRING COMMENT '',
'path': STRING COMMENT '',
'size': INTEGER COMMENT '',
> COMMENT '',
> COMMENT '',
'_id' NULL COMMENT '',
'cheesecake_installability_id' NULL COMMENT '',
)
COMMENT ''
PARTITIONED BY (part_date STRING)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE
LOCATION ''
Я не вижу, как извлечь ключи, содержащиеся в кортеже (структура и массив). Если у кого-то есть идея
спасибо
Лили