Такой подход должен работать.
val groupedDF = df.withColumn("item",when(col("has_pencil").equalTo("true"),"Pencil").otherwise(when(col("has_pen").equalTo("true"),"Pen")))
.groupBy("id").agg(collect_set(co("item")).as("has_items"))
val res = df.join(groupedDF,Seq("id"))