Вы можете использовать pyspark.sql.functions
для достижения этого.
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
sqlContext = SparkSession.builder.appName("test").enableHiveSupport().getOrCreate()
data = [('x1-y1', 3,'z1'),
('x2-y2', 2,'z2'),
('x3-y3', 1,'z3')]
test_df = sqlContext.createDataFrame(data, schema=['_1', '_2', '_3'])
test_df = test_df.withColumn('_4', F.regexp_replace('_1', '-', ''))
test_df = test_df.withColumn('_5', F.concat(F.regexp_replace('_1', '-', '='),F.lit('='),F.col('_3')))
test_df.show()
+-----+---+---+----+--------+
| _1| _2| _3| _4| _5|
+-----+---+---+----+--------+
|x1-y1| 3| z1|x1y1|x1=y1=z1|
|x2-y2| 2| z1|x2y2|x2=y2=z2|
|x3-y3| 1| z1|x3y3|x3=y3=z3|
+-----+---+---+----+--------+