как записать вывод во вложенном json, используя pyspark в aws клее - PullRequest
0 голосов
/ 16 июня 2020

Я хочу записать вывод во вложенном JSON, используя pyspark в AWS клее. Я выполнил следующие шаги:

Я использовал приведенный ниже код pyspark в AWS клее, например,

applymapping1 = ApplyMapping.apply(frame = dynJoin, mappings = [("patientid", "decimal(19,0)", 
 "patientid", "decimal(19,0)"),("last_name", "string", "last_name", "string"),("first_name", "string", 
 "first_name", "string"),("middle_name", "string", "middle_name", "string"),("prefix", "string", 
 "prefix", "string"),("suffix", "string", "suffixe", "string"),("street_address_1", "string", 
 "street_address_1", "string"), ("street_address_2", "string", "street_address_2", "string"), ("zip", 
 "string", "zip", "string"),   ("city", "string", "city", "string"),("country", "string", "country", 
 "string"), ("group_name", "string", "group_name", "string"),  ("group_id", "string", "group_id", 
 "string"),("current_member_id", "decimal(19,0)", "current_member_id", "decimal(19,0)")], 
 transformation_ctx = "applymapping1")

def MergeAddress(rec):

  del rec["street_address_1"]
  del rec["street_address_2"]
  del rec["zip"]
  del rec["city"]
  del rec["country"]
  return rec

mapped_dyF =  Map.apply(frame = applymapping1, f = MergeAddress)

И вывод: -

{"patientid":8002,"Address":{"Array":["18 Orchard Avenue",null,"19001","Abington",null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1000434787}
{"patientid":8001,"Address":{"Array":["333 Oak Street",null,"34801","Bradenton",null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1222333444}
{"patientid":8001,"Address":{"Array":["102 North Main Street","Suite 41","32801","Orlando",null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1222333444}
{"patientid":8003,"Address":{"Array":[null,null,null,null,null]},"group_id":"OLRX","group_name":"OLR Executive","current_member_id":12288889444}

Однако, вывод должен быть в формате ниже

{"patientid":8001,
"Address":
[{"street_address_1":"333 Oak Street","street_address_2":null,"zip":"34801","city":"Bradenton","country":null},
{"street_address_1":"102 North Main Street","street_address_2":"Suite 41","zip":"32801","city":"Orlando","country":null}
]
,"group_id":"OLRX","group_name":"OLR Executive","current_member_id":1222333444
}
...