Mleap Spark один против ошибки Serealization ** Версия Spark 2.3, Mleap 0,13 ** - PullRequest
0 голосов
/ 08 января 2019

Я получаю следующую ошибку ( Исключение в потоке "main" java.util.NoSuchElementException: ключ не найден: org.apache.spark.ml.classification.OneVsRestModel ) при попытке сериализации модели (ниже).

import ml.combust.bundle.BundleFile
import ml.combust.bundle.serializer.SerializationFormat.Protobuf
import ml.combust.mleap.spark.SparkSupport._
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.classification.{LogisticRegression, OneVsRest}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.PCA
import org.apache.spark.ml.tuning.{CrossValidator, ParamGridBuilder}
import resource.managed

import org.apache.spark.sql.SparkSession

object OneVsRestExample {

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName(s"OneVsRestExample")
      .getOrCreate()


    val inputData = spark.read.format("libsvm").load("/home/data/sample_multiclass_classification_data.txt")
      .withColumnRenamed("features","input")


    // generate the train/test split.
    val Array(train, test) = inputData.randomSplit(Array(0.8, 0.2))

    // instantiate the base classifier
    val pca = new PCA()
      .setInputCol("input")
      .setOutputCol("pcaFeatures")

    val classifier = new LogisticRegression()
      .setFeaturesCol("pcaFeatures")
      .setFitIntercept(true)

    val ovr = new OneVsRest().setClassifier(classifier)
    .setFeaturesCol(classifier.getFeaturesCol)

    // instantiate the One Vs Rest Classifier.
    val w2vStages = Array(pca,ovr)
    val  pipeline =  new Pipeline().setStages(w2vStages)
    val paramGrid = new ParamGridBuilder()
      .addGrid(pca.k, Array(2,3,4))
      .addGrid(classifier.maxIter, Array(5,10,20,200))
      .addGrid(classifier.tol, Array(1e-5,1e-6))
      .build()


    val evalMetric = new MulticlassClassificationEvaluator()
      .setMetricName("weightedPrecision")

    val cv = new CrossValidator()
      .setEstimator(pipeline)
      .setEvaluator(evalMetric)
      .setEstimatorParamMaps(paramGrid)
      .setNumFolds(4)


    val model = cv.fit(train)

   //writing to mleap bundle

   val localPathOnDisk = "/home/data/ovr_cv_mleap_model"
    for (bundle <- managed(BundleFile(s"file:$localPathOnDisk"))) {
        model.writeBundle.format(Protobuf).save(bundle).get

}

Сбой кода при создании комплекта модели (последняя строка фрагмента кода ( model.writeBundle.format (Protobuf) .save (bundle) .get ).

...