Попробуйте это -
Пример данных:
df = spark.createDataFrame([(1,30),(2,40),(3,60)],['row_id','tot_reduced_load'])
df.show()
#+------+----------------+
#|row_id|tot_reduced_load|
#+------+----------------+
#| 1| 30|
#| 2| 40|
#| 3| 60|
#+------+----------------+
Опция 1: withColumn
from pyspark.sql import functions as psf
tot_reduced_load_new = psf.when(psf.col("tot_reduced_load") > 50 , 50).otherwise(psf.col("tot_reduced_load"))
df.withColumn("tot_reduced_load_new",tot_reduced_load_new ).show()
#+------+----------------+--------------------+
#|row_id|tot_reduced_load|tot_reduced_load_new|
#+------+----------------+--------------------+
#| 1| 30| 30|
#| 2| 40| 40|
#| 3| 60| 50|
#+------+----------------+--------------------+
Опция 2: selectExpr
df.selectExpr("*","CASE WHEN tot_reduced_load > 50 THEN 50 ELSE tot_reduced_load END AS tot_reduced_load_new").show()
#+------+----------------+--------------------+
#|row_id|tot_reduced_load|tot_reduced_load_new|
#+------+----------------+--------------------+
#| 1| 30| 30|
#| 2| 40| 40|
#| 3| 60| 50|
#+------+----------------+--------------------+