Ошибка не сериализуемой задачи в scala spark - PullRequest
0 голосов
/ 10 июня 2019

У меня есть две переменные ниже:

var rddPair1 : Array[(String, String)] = Array((0000003,杉山______ 26 F),
    (0000005,崎村______ 50 F), (0000007,梶川______ 42 F))

и

var rddPair2 : Array[(String, String)] = Array((0000005,82 79 16 21 80),
    (0000001,46 39 8 5 21), (0000004,58 71 20 10 6), (0000009,60 89 33 18 6),
    (0000003,30 50 71 36 30), (0000007,50 2 33 15 62))

Приведенный ниже код объединяет эти 2 переменные с полным внешним объединением:

var emp = rddPair1.first._2.replaceAll("\\S", "*") //emp:String = ***** ** *
rddPair1.fullOuterJoin(rddPair2).map {
  case (id, (left, right)) =>
    (id,left.getOrElse(emp)+" "+ right)
}.collect()

И я получаю ошибку, подобную приведенной ниже:

org.apache.spark.SparkException: Task not serializable
  at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:403)
  at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:393)
  at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:162)
  at org.apache.spark.SparkContext.clean(SparkContext.scala:2326)
  at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:371)
  at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:370)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
  at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
  at org.apache.spark.rdd.RDD.map(RDD.scala:370)
  ... 56 elided
Caused by: java.io.NotSerializableException: org.apache.spark.SparkContext
Serialization stack:
        - object not serializable (class: org.apache.spark.SparkContext, value: org.apache.spark.SparkContext@4d87e7f3)
        - field (class: $iw, name: spark, type: class org.apache.spark.SparkContext)
        - object (class $iw, $iw@65af4162)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@7da837af)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@54d0724f)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@389ae8f1)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@55ecf961)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@428c9250)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@d931617)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@2625c1cc)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@1231e446)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@27dbe9a3)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@63ad2a0f)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@203f41d7)
        - field (class: $line19.$read, name: $iw, type: class $iw)
        - object (class $line19.$read, $line19.$read@46a9af36)
        - field (class: $iw, name: $line19$read, type: class $line19.$read)
        - object (class $iw, $iw@19d118d5)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@5dac488d)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@1bba5848)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@4f1a6259)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@25712d03)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@750c242e)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@ad038f8)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@4ba64e36)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@223f8c82)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@ba1f5d1)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@4355f7b6)
        - field (class: $line22.$read, name: $iw, type: class $iw)
        - object (class $line22.$read, $line22.$read@44535df8)
        - field (class: $iw, name: $line22$read, type: class $line22.$read)
        - object (class $iw, $iw@32e14e55)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@5a78e7e3)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@28736857)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@16be6b36)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@211e1b51)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@1cce2194)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@7b31281b)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@63c9017b)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@e343477)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@3a182eaf)
        - field (class: $iw, name: $iw, type: class $iw)
        - object (class $iw, $iw@131af11d)
        - field (class: $line24.$read, name: $iw, type: class $iw)
        - object (class $line24.$read, $line24.$read@7cb39309)
        - field (class: $iw, name: $line24$read, type: class $line24.$read)
        - object (class $iw, $iw@282afe91)
        - field (class: $iw, name: $outer, type: class $iw)
        - object (class $iw, $iw@33592b53)
        - field (class: $anonfun$1, name: $outer, type: class $iw)
        - object (class $anonfun$1, <function1>)
  at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:40)
  at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46)
  at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
  at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:400)
  ... 65 more 

И это мой желаемый вывод:

0000001 ********** ** * 46 39 8  5  21
0000004 ********** ** * 58 71 20 10 6
0000009 ********** ** * 60 89 33 18 6
0000003 杉山______ 26 F 30 50 71 36 30
0000007 梶川______ 42 F 50 2  33 15 62


1 Ответ

0 голосов
/ 11 июня 2019

Решение подходит для обеих сред.

Привет.

...