Я хочу установить пороговое значение моей логистической регрессии c равным 0,5, и я хочу получить точность, напомним, f1 для этого значения для модели конвейера. Но
model.setThreshold (0.5)
дай мне
значение setThreshold не является членом org. apache .spark.ml.PipelineModel
val Array(train, test) = dataset
.randomSplit(Array(0.8, 0.2), seed = 1234L)
.map(_.cache())
val assembler = new VectorAssembler()
.setInputCols(Array("label", "id", "features"))
.setOutputCol("feature")
val pca = new PCA()
.setInputCol("feature")
.setK(2)
.setOutputCol("pcaFeatures")
val classifier = new LogisticRegression()
.setFeaturesCol("pcaFeatures")
.setLabelCol("label")
.setMaxIter(10).setRegParam(0.3).setElasticNetParam(0.8)
val pipeline = new Pipeline().setStages(Array(assembler, pca, classifier))
val model = pipeline.fit(train)
val predicted = model.transform(test)
predicted.show()
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.sql.Row
val predictions = predicted.filter(row => row.getAs[Int]("label") == 1).map(row => (row.getAs[Int]("label"), row.getAs[DenseVector] ("probability")(0)))
predictions.show()
import org.apache.spark.mllib.evaluation.MulticlassMetrics
val predictionAndLabels = predicted.
select($"label",$"prediction").
as[(Double, Double)].
rdd
val metrics = new BinaryClassificationMetrics(predictionAndLabels)
val precision = metrics.precisionByThreshold()
precision.foreach { case (t, p) =>
println(s"Threshold is: $t, Precision is: $p")
}
val recall = metrics.recallByThreshold
recall.foreach { case (t, p) =>
println(s"Threshold is: $t,recall is: $p")
}
+---+-------------------+-----+--------------------+--------------------+--------------------+--------------------+----------+
| id| features|label| feature| pcaFeatures| rawPrediction| probability|prediction|
+---+-------------------+-----+--------------------+--------------------+--------------------+--------------------+----------+
| 3|0.03731932516607228| 1|[1.0,3.0,0.037319...|[-3.0000000581646...|[-0.8840273374633...|[0.29234391132806...| 1.0|
| 7| 0.9636476860201426| 1|[1.0,7.0,0.963647...|[-7.0000000960209...|[-0.8831455606697...|[0.29252636578097...| 1.0|
| 8| 0.4766320058073684| 0|[0.0,8.0,0.476632...|[-8.0000000194785...|[0.87801311177017...|[0.70641031990863...| 0.0|
| 45| 0.1474318959104205| 1|[1.0,45.0,0.14743...|[-45.000000062664...|[-0.8839183791391...|[0.29236645302163...| 1.0|
|103| 0.3443839885873453| 1|[1.0,103.0,0.3443...|[-103.00000007071...|[-0.8837251994055...|[0.29240642125330...| 1.0|
Как установить пороговое значение t моей модели регрессии Logisti c с конвейером?