scala> df.show(false)
+------+--------------------------------------------------------------------------------------+
|MainId|genres |
+------+--------------------------------------------------------------------------------------+
|862 |[{'id':16,'name':'Animation'},{'id':35,'name':'Comedy'},{'id':10751,'name':'Family'}] |
|8844 |[{'id':12,'name':'Adventure'},{'id':14,'name':'Fantasy'},{'id':10751,'name':'Family'}]|
|15602 |[{'id':10749,'name':'Romance'},{'id':35,'name':'Comedy'}] |
|31357 |[{'id':35,'name':'Comedy'},{'id':18,'name':'Drama'},{'id':10749,'name':'Romance'}] |
|11862 |[{'id':35,'name':'Comedy'}] |
+------+--------------------------------------------------------------------------------------+
scala> val df1 = df.withColumn("genres", regexp_replace(col("genres"), "[\\[\\]{}']", ""))
.withColumn("genres", explode(split(col("genres"), ",")))
.withColumn("columns", split(col("genres"), ":")(0))
.withColumn("value", split(col("genres"), ":")(1))
scala> df1.show(false)
+------+--------------+-------+---------+
|MainId|genres |columns|value |
+------+--------------+-------+---------+
|862 |id:16 |id |16 |
|862 |name:Animation|name |Animation|
|862 |id:35 |id |35 |
|862 |name:Comedy |name |Comedy |
|862 |id:10751 |id |10751 |
|862 |name:Family |name |Family |
|8844 |id:12 |id |12 |
|8844 |name:Adventure|name |Adventure|
|8844 |id:14 |id |14 |
|8844 |name:Fantasy |name |Fantasy |
|8844 |id:10751 |id |10751 |
|8844 |name:Family |name |Family |
|15602 |id:10749 |id |10749 |
|15602 |name:Romance |name |Romance |
|15602 |id:35 |id |35 |
|15602 |name:Comedy |name |Comedy |
|31357 |id:35 |id |35 |
|31357 |name:Comedy |name |Comedy |
|31357 |id:18 |id |18 |
|31357 |name:Drama |name |Drama |
+------+--------------+-------+---------+
scala> val df2 = df1.groupBy("MainId").pivot("columns")
.agg(collect_list(col("value")))
.withColumn("json", explode(arrays_zip(col("id"), col("name"))))
.select("MainId", "json")
scala> df2.show()
+------+----------------+
|MainId| json|
+------+----------------+
| 862| [16, Animation]|
| 862| [35, Comedy]|
| 862| [10751, Family]|
| 15602|[10749, Romance]|
| 15602| [35, Comedy]|
| 11862| [35, Comedy]|
| 31357| [35, Comedy]|
| 31357| [18, Drama]|
| 31357|[10749, Romance]|
| 8844| [12, Adventure]|
| 8844| [14, Fantasy]|
| 8844| [10751, Family]|
+------+----------------+
scala> df2.select("MainId","json.id","json.name").show
+------+-----+---------+
|MainId| id| name|
+------+-----+---------+
| 862| 16|Animation|
| 862| 35| Comedy|
| 862|10751| Family|
| 15602|10749| Romance|
| 15602| 35| Comedy|
| 11862| 35| Comedy|
| 31357| 35| Comedy|
| 31357| 18| Drama|
| 31357|10749| Romance|
| 8844| 12|Adventure|
| 8844| 14| Fantasy|
| 8844|10751| Family|
+------+-----+---------+