from pyspark.sql.functions import when,col
from pyspark.sql.functions import udf
#Your code here to create a new variable df_kmeans_new with a new column Position_Group,..
from pyspark.sql.types import *
#Your code to complete
DEF= ["LB","LWB","RB","LCB","RCB","CB","RWB"]
FWD= ["RF","LF","LW","RS","RW","LS","CF","ST"]
MID= ["LCM","LM","RDM","CAM","RAM","RCM","CM","CDM","RM","LAM","LDM"]
df = spark.createDataFrame(
[(1, "LB", "4"),
(2, "LM", "0"),
(3, "LCB", "4"),
(4, "RS", "4")],
("id", "Position", "Position_x"))
def check_in_def(cell_val):
if cell_val in DEF:
return "DEF"
elif cell_val in FWD:
return "FWD"
elif cell_val in MID:
return "MID"
else:
return "NA"
df = df.withColumn("Position_Group",when(check_in_def(df.Position)=="DEF","DEF").when(check_in_def(df.Position)=="FWD","FWD").otherwise(0)).show()
Я хочу создать новый столбец в df, который будет содержать одно из трех имен массива: DEF, FWD и MID, если значение столбца Position найдено в конкретном массиве.
но код не работает ... пожалуйста, помогите!