Вы определяете функцию udf
from spark.sql import function as F
from pyspark.sql.types import StringType()
def dictonnary(x):
if x in frequent_ls:
return x
else:
return "rare"
replace = F.udf(lambda x: dictionnary(x), StrungType())
Xtrain = xtrain.withColumn("var2", replace(F.col("var")))