Я пытался выразиться проще:
Определение функции:
scala> import org.apache.spark.sql.Row
import org.apache.spark.sql.Row
scala> def anyNull(itm:Row, search: String): Boolean = { if(itm.toSeq.contains(search)) (true) else (false) }
anyNull: (itm: org.apache.spark.sql.Row, search: String)Boolean
Использование для СДР [Строка]:
scala> val rdd1 = sc.parallelize(Seq(Row("1","a","A"),Row("2","b", "B"),Row("3","c","C")))
rdd1: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = ParallelCollectionRDD[130] at parallelize at <console>:25
scala> rdd1.foreach(println)
[1,a,A]
[2,b,B]
[3,c,C]
scala> rdd1.map(r => (r, anyNull(r,"b"))).foreach(println)
([2,b,B],true)
([3,c,C],false)
([1,a,A],false)
scala> rdd1.map(r => (r, anyNull(r,"a"))).foreach(println)
([1,a,A],true)
([3,c,C],false)
([2,b,B],false)
scala> rdd1.map(r => (r, anyNull(r,""))).foreach(println)
([3,c,C],false)
([2,b,B],false)
([1,a,A],false)
Использование для DF:
scala> val df = sc.parallelize(Seq(("1","a","A"),("2","b", "B"),("3","c","C"))).toDF("num", "smallcase", "uppercase")
df: org.apache.spark.sql.DataFrame = [num: string, smallcase: string ... 1 more field]
scala> df.show()
+---+---------+---------+
|num|smallcase|uppercase|
+---+---------+---------+
| 1| a| A|
| 2| b| B|
| 3| c| C|
+---+---------+---------+
scala> df.rdd.map(r => Row(r(0), r(1), r(2))).map(r => (r, anyNull(r,"b"))).foreach(println)
([2,b,B],true)
([1,a,A],false)
([3,c,C],false)
scala> df.rdd.map(r => Row(r(0), r(1), r(2))).map(r => (r, anyNull(r,""))).foreach(println)
([2,b,B],false)
([1,a,A],false)
([3,c,C],false)