Если вы хотите создать json объект в кадре данных, используйте функции collect_list
+ create_map
+ to_json
.
(or)
Чтобы записать как json
документ в файл, не используйте to_json
, вместо этого используйте .write.json()
Create JSON object:
df.agg(collect_list(create_map(lit("product"),"product",lit("cost"),"cost")).alias("stru")).\
selectExpr("to_json(stru) as json").\
show(10,False)
#+-------------------------------------------------------------------------------------------------------------------------------+
#|json |
#+-------------------------------------------------------------------------------------------------------------------------------+
#|[{"product":"pen","cost":"10"},{"product":"book","cost":"40"},{"product":"bottle","cost":"80"},{"product":"glass","cost":"55"}]|
#+-------------------------------------------------------------------------------------------------------------------------------+
#write to hdfs use .saveAsTextFile
df.agg(collect_list(create_map(lit("product"),"product",lit("cost"),"cost")).alias("stru")).selectExpr("to_json(stru) as json").rdd.map(lambda x:x['json']).saveAsTextFile("<path>")
#cat part-00000
#[{"product":"pen","cost":"10"},{"product":"book","cost":"40"},{"product":"bottle","cost":"80"},{"product":"glass","cost":"55"}]
Create JSON file:
df.agg(collect_list(create_map(lit("product"),"product",lit("cost"),"cost")).alias("stru")).write.mode("overwrite").json("<path>")
#cat part-00000-3a19165e-219e-4485-adb8-ef91589d6e31-c000.json
#{"stru":[{"product":"pen","cost":"10"},{"product":"book","cost":"40"},{"product":"bottle","cost":"80"},{"product":"glass","cost":"55"}]}