Вы можете использовать API DataFrame, чтобы сделать это несколькими способами.Вот один
import org.apache.spark.sql.functions._
val rd = sc.textFile("/FileStore/tables/log.txt").zipWithIndex.map{case (r, i) => Row(r, i)}
val schema = StructType(StructField("logs", StringType, false) :: StructField("id", LongType, false) :: Nil)
val df = spark.sqlContext.createDataFrame(rd, schema)
df.show
+--------------------+---+
| logs| id|
+--------------------+---+
|2018-10-15 05:24:...| 0|
| | 1|
|com.xyz.abc.pqr.e...| 2|
| at com.xyz.ab...| 3|
| at java.util....| 4|
| rContainer.do...| 5|
| at org.spring...| 6|
| at org.spring...| 7|
| at java.lang....| 8|
|Caused by: java.l...| 9|
| at com.xyz.ab...| 10|
| at com.xyz.ab...| 11|
| at com.xyz.ab...| 12|
| at java.util....| 13|
| | 14|
|2018-10-15 05:24:...| 15|
| | 16|
|com.xyz.abc.pqr.P...| 17|
val df1 = df.filter($"logs".contains("c.l.p.a.c.event.listener.MQListener")).withColumn("logs",regexp_replace($"logs","ERROR.*","")).sort("id")
df1.show
+--------------------+---+
| logs| id|
+--------------------+---+
|2018-10-15 05:24:...| 0|
|2018-10-15 05:24:...| 15|
+--------------------+---+
val df2 = df.filter($"logs".contains("PrescriptionNotValidException:")).withColumn("logs",regexp_replace($"logs","(.*?)mandatory fields.","")).sort("id")
df2.show
+--------------------+---+
| logs| id|
+--------------------+---+
| StoreId: 123, Co...| 2|
| StoreId: 234, Co...| 17|
+--------------------+---+
val df3 = df.filter($"logs".contains("Caused by: java.lang.")).sort("id")
df3.show
df1.select("logs").collect.toSeq.zip(df2.select("logs").collect.toSeq).zip(df3.select("logs").collect.toSeq)
+--------------------+---+
| logs| id|
+--------------------+---+
|Caused by: java.l...| 9|
|Caused by: java.l...| 28|
+--------------------+---+
df3: org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [logs: string, id: bigint]
res71: Seq[((org.apache.spark.sql.Row, org.apache.spark.sql.Row), org.apache.spark.sql.Row)] = ArrayBuffer((([2018-10-15 05:24:00.102 ],[ StoreId: 123, Co Patient Id: 123456789, Rx Number: 12345678]),[Caused by: java.lang.IllegalArgumentException: Invalid Dispense Object because compound: null and pack: null were missing.]), (([2018-10-15 05:24:25.136 ],[ StoreId: 234, Co Patient Id: 999999, Rx Number: 45555]),[Caused by: java.lang.NullPointerException: null]))