from pyspark.sql.functions import when, col
values = [(22 ,None ,23 , 56), (12, 54, 22, 36), (48 ,None,2 , 45), (76, 32, 13, 6), (23, None, 43, 8),
(67, 54, 56, 64), (16, 32, 32, 6), (3, 54, 64, 8), (67, 4, 23, 64)]
df = sqlContext.createDataFrame(values,['col1','col2','col3','col4'])
df.show()
+----+----+----+----+
|col1|col2|col3|col4|
+----+----+----+----+
| 22|null| 23| 56|
| 12| 54| 22| 36|
| 48|null| 2| 45|
| 76| 32| 13| 6|
| 23|null| 43| 8|
| 67| 54| 56| 64|
| 16| 32| 32| 6|
| 3| 54| 64| 8|
| 67| 4| 23| 64|
+----+----+----+----+
df = df.withColumn('col4',when((col('col4')<col('col1')) & col('col2').isNotNull(),col('col1')).otherwise(col('col4')))
df.show()
+----+----+----+----+
|col1|col2|col3|col4|
+----+----+----+----+
| 22|null| 23| 56|
| 12| 54| 22| 36|
| 48|null| 2| 45|
| 76| 32| 13| 76|
| 23|null| 43| 8|
| 67| 54| 56| 67|
| 16| 32| 32| 16|
| 3| 54| 64| 8|
| 67| 4| 23| 67|
+----+----+----+----+