Может быть выполнено с помощью искры sql, как показано ниже:
Искра SQL
data.createOrReplaceTempView("data")
spark.sql("""
select a, b, to_json(named_struct('a',a, 'b',b,'c',c,'d',d,'e',e)) as c
from data""").show(20,False)
Выход
# +---+---+----------------------------------------+
# |a |b |c |
# +---+---+----------------------------------------+
# |1 |a |{"a":1,"b":"a","c":"foo1","d":"4","e":5}|
# |2 |b |{"a":2,"b":"b","c":"bar","d":"4","e":6} |
# |3 |c |{"a":3,"b":"c","c":"mnc","d":"4","e":7} |
# |4 |c |{"a":4,"b":"c","c":"mnc","d":"4","e":7} |
# +---+---+----------------------------------------+
Datframe API
result = data\
.withColumn('c',to_json(struct(data.a,data.b,data.c,data.d,data.e)))\
.select("a","b","c")
result.show(20,False)
Вывод
# +---+---+----------------------------------------+
# |a |b |c |
# +---+---+----------------------------------------+
# |1 |a |{"a":1,"b":"a","c":"foo1","d":"4","e":5}|
# |2 |b |{"a":2,"b":"b","c":"bar","d":"4","e":6} |
# |3 |c |{"a":3,"b":"c","c":"mnc","d":"4","e":7} |
# |4 |c |{"a":4,"b":"c","c":"mnc","d":"4","e":7} |
# +---+---+----------------------------------------+