from pyspark.sql.functions import *
newDf = df.withColumn('_c0', regexp_replace('_c0', '@', ''))\
.withColumn('_c1', regexp_replace('_c1', "'", ''))\
.withColumn('_c2', regexp_replace('_c2', '!', ''))
newDf.show()
Обновлено
import org.apache.spark.sql.functions._
val df11 = Seq("'101-23','23-00-11','NOV-11-23','34-000-1111-1'").toDS()
df11.show()
//df11.select(col("a"), substring_index(col("value"), ",", 1).as("b"))
val df111=df11.withColumn("value", substring(df11("value"), 0, 10))
df111.show()
Результат:
+--------------------+
| value|
+--------------------+
|'101-23','23-00-1...|
+--------------------+
+----------+
| value|
+----------+
|'101-23','|
+----------+
import org.apache.spark.sql.functions._
df11: org.apache.spark.sql.Dataset[String] = [value: string]
df111: org.apache.spark.sql.DataFrame = [value: string]