У меня есть две переменные ниже:
var rddPair1 : Array[(String, String)] = Array((0000003,杉山______ 26 F),
(0000005,崎村______ 50 F), (0000007,梶川______ 42 F))
и
var rddPair2 : Array[(String, String)] = Array((0000005,82 79 16 21 80),
(0000001,46 39 8 5 21), (0000004,58 71 20 10 6), (0000009,60 89 33 18 6),
(0000003,30 50 71 36 30), (0000007,50 2 33 15 62))
Приведенный ниже код объединяет эти 2 переменные с полным внешним объединением:
var emp = rddPair1.first._2.replaceAll("\\S", "*") //emp:String = ***** ** *
rddPair1.fullOuterJoin(rddPair2).map {
case (id, (left, right)) =>
(id,left.getOrElse(emp)+" "+ right)
}.collect()
И я получаю ошибку, подобную приведенной ниже:
org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:403)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:393)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:162)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:371)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:370)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
at org.apache.spark.rdd.RDD.map(RDD.scala:370)
... 56 elided
Caused by: java.io.NotSerializableException: org.apache.spark.SparkContext
Serialization stack:
- object not serializable (class: org.apache.spark.SparkContext, value: org.apache.spark.SparkContext@4d87e7f3)
- field (class: $iw, name: spark, type: class org.apache.spark.SparkContext)
- object (class $iw, $iw@65af4162)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@7da837af)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@54d0724f)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@389ae8f1)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@55ecf961)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@428c9250)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@d931617)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@2625c1cc)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@1231e446)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@27dbe9a3)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@63ad2a0f)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@203f41d7)
- field (class: $line19.$read, name: $iw, type: class $iw)
- object (class $line19.$read, $line19.$read@46a9af36)
- field (class: $iw, name: $line19$read, type: class $line19.$read)
- object (class $iw, $iw@19d118d5)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@5dac488d)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@1bba5848)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@4f1a6259)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@25712d03)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@750c242e)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@ad038f8)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@4ba64e36)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@223f8c82)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@ba1f5d1)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@4355f7b6)
- field (class: $line22.$read, name: $iw, type: class $iw)
- object (class $line22.$read, $line22.$read@44535df8)
- field (class: $iw, name: $line22$read, type: class $line22.$read)
- object (class $iw, $iw@32e14e55)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@5a78e7e3)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@28736857)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@16be6b36)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@211e1b51)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@1cce2194)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@7b31281b)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@63c9017b)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@e343477)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@3a182eaf)
- field (class: $iw, name: $iw, type: class $iw)
- object (class $iw, $iw@131af11d)
- field (class: $line24.$read, name: $iw, type: class $iw)
- object (class $line24.$read, $line24.$read@7cb39309)
- field (class: $iw, name: $line24$read, type: class $line24.$read)
- object (class $iw, $iw@282afe91)
- field (class: $iw, name: $outer, type: class $iw)
- object (class $iw, $iw@33592b53)
- field (class: $anonfun$1, name: $outer, type: class $iw)
- object (class $anonfun$1, <function1>)
at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:400)
... 65 more
И это мой желаемый вывод:
0000001 ********** ** * 46 39 8 5 21
0000004 ********** ** * 58 71 20 10 6
0000009 ********** ** * 60 89 33 18 6
0000003 杉山______ 26 F 30 50 71 36 30
0000007 梶川______ 42 F 50 2 33 15 62