Вы можете использовать expr
:
from pyspark.sql import functions
df = spark.createDataFrame([
'John D Hoye,D,John Hoye'.split(','),
['John D Hoye', None, 'John D Hoye'],
'John D-Doe Hoye,D-Doe,John Hoye'.split(','),
'John D,D,John'.split(','),
'D John,D,John'.split(','),
'John Doe Hoy,Doe Hoy,John'.split(',')
], 'Fname,MiddleName,Expected_FName'.split(','))
res = df.withColumn('expected', functions.expr("regexp_replace(Fname, MiddleName, '')")
res.show()
+---------------+----------+--------------+----------+
| Fname|MiddleName|Expected_FName| expected|
+---------------+----------+--------------+----------+
| John D Hoye| D| John Hoye|John Hoye|
| John D Hoye| null| John D Hoye| null|
|John D-Doe Hoye| D-Doe| John Hoye|John Hoye|
| John D| D| John| John |
| D John| D| John| John|
| John Doe Hoy| Doe Hoy| John| John |
+---------------+----------+--------------+----------+