Регулярное выражение ниже также работает
scala> val df = Seq(("it_shampoo",5),
| ("it_books",5),
| ("it_mm",5),
| ("{it_mm}",5),
| ("it_books it_books",5),
| ("{=it_books} it_books",5)).toDF("itemType","count")
df: org.apache.spark.sql.DataFrame = [itemType: string, count: int]
scala> df.select( regexp_replace('itemtype,""".*\b(\S+)\b(.*)$""", "$1").as("replaced"),'count).show
+----------+-----+
| replaced|count|
+----------+-----+
|it_shampoo| 5|
| it_books| 5|
| it_mm| 5|
| it_mm| 5|
| it_books| 5|
| it_books| 5|
+----------+-----+
scala>