import json
rdd = sc.parallelize(["a\tb\t", "a\tc\t"])
def to_json(x):
res = []
res["0"] = x.split('\t')[0]
res["1"] = x.split('\t')[1]
return json.dumps(res)
rdd = rdd.map(to_json)
Я жду такого api?
# how to write rdd to json?
rdd.saveAsTextFile(output_path)
# And how to read it?
rdd_json = sc.textFile(output_path)