Из исходного фрейма данных лучше получить количество слов.
from pyspark.sql.functions import col, explode, count, collect_list, map_from_arrays
df = spark.createDataFrame(words).toDF('createDate', 'nounwords')
df.show(10, False)
+----------+----------------+
|createDate|nounwords |
+----------+----------------+
|2020-07-01|[] |
|2020-07-01|[test] |
|2020-07-01|[test, template]|
|2020-07-01|[aaa, bbb] |
|2020-07-01|[test, ccc] |
+----------+----------------+
df2 = df.select('createDate', explode('nounwords').alias('nounwords')) \
.groupBy('createDate', 'nounwords') \
.agg(count(col('nounwords')).alias('count'))
df2.show(10, False)
+----------+---------+-----+
|createDate|nounwords|count|
+----------+---------+-----+
|2020-07-01|bbb |1 |
|2020-07-01|template |1 |
|2020-07-01|test |3 |
|2020-07-01|ccc |1 |
|2020-07-01|aaa |1 |
+----------+---------+-----+
df3 = df2.groupBy('createDate') \
.agg(collect_list(col('nounwords')).alias('nounwords'), collect_list(col('count')).alias('count')) \
.withColumn('map', map_from_arrays(col('nounwords'), col('count'))) \
.drop('nounwords', 'count')
df3.show(10, False)
+----------+--------------------------------------------------------+
|createDate|map |
+----------+--------------------------------------------------------+
|2020-07-01|[bbb -> 1, template -> 1, test -> 3, ccc -> 1, aaa -> 1]|
+----------+--------------------------------------------------------+