Вам необходимо выбрать все необходимые столбцы в .select
, и только эти столбцы будут использоваться в .withColumn
Example:
df1=spark.createDataFrame([("a","1","4","t"),("b","2","5","v"),("c","3","6","v")],["col1","col2","col3","col4"])
df2=spark.createDataFrame([("a","1","4","ord2"),("b","2","5","ord1"),("c","3","6","ord3")],["col1","col2","col3","col4"])
df1.join(df2,df1.col1 == df2.col1,'inner').select(df1.col1,df2.col2,df1.col3,df1.col2,df2.col4).withColumn("col3",(df1.col3 / df2.col2).cast("double")).withColumn("col2",(df1.col2 + df2.col2).cast("int")).show()
#+----+----+----+----+----+
#|col1|col2|col3|col2|col4|
#+----+----+----+----+----+
#| a| 2| 4.0| 2|ord2|
#| b| 4| 2.5| 4|ord1|
#| c| 6| 2.0| 6|ord3|
#+----+----+----+----+----+