После StringIndexer OneHotEncoder и vectorAssembler мои данные о поездах выглядят так:
+--------------------+----------+
| features|classIndex|
+--------------------+----------+
|(12,[0,1,2,3,4,8]...| 945.0|
|(12,[0,1,2,3,4,10...| 468.0|
|(12,[0,1,2,3,4,8]...| 2125.0|
|(12,[0,1,2,3,4,8]...| 1231.0|
|(12,[1,2,3,4,8],[...| 1754.0|
|(12,[0,1,2,3,4,8]...| 3790.0|
|(12,[0,1,2,3,4,8]...| 1714.0|
|(12,[0,1,2,3,4,8]...| 1046.0|
|(12,[0,1,2,3,4,8]...| 8145.0|
|(12,[0,1,2,3,4,8]...| 6643.0|
|(12,[0,1,2,3,4,8]...| 1729.0|
|(12,[0,1,2,3,4,6]...| 4211.0|
|(12,[1,2,3,4,6],[...| 444.0|
|(12,[0,1,2,3,4,6]...| 444.0|
|(12,[0,1,2,3,4,6]...| 3948.0|
|(12,[0,1,2,3,4,6]...| 6784.0|
.....
Мой пункт xgboost:
val xgbParam = Map("eta" -> 0.1f,
"max_depth" -> 2,
"objective" -> "multi:softprob",
"num_class" -> 8249,
"num_round" -> 100,
"num_workers" -> 2)
val xgbClassifier = new XGBoostClassifier(xgbParam).
setFeaturesCol("features").
setLabelCol("classIndex")
Когда я использую xgbClassifier.fit (xgbInput), я получаю:
Exception in thread "main" ml.dmlc.xgboost4j.java.XGBoostError: XGBoostModel training failed
Я действительно не знаю, где что-то пошло не так.Полные журналы ниже, кто-нибудь знает почему?
Exception in thread "main" ml.dmlc.xgboost4j.java.XGBoostError: XGBoostModel training failed
at ml.dmlc.xgboost4j.scala.spark.XGBoost$.ml$dmlc$xgboost4j$scala$spark$XGBoost$$postTrackerReturnProcessing(XGBoost.scala:511)
at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$trainDistributed$1.apply(XGBoost.scala:404)
at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$trainDistributed$1.apply(XGBoost.scala:381)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:381)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:285)
at ml.dmlc.xgboost4j.scala.spark.XGBoost$.trainDistributed(XGBoost.scala:380)
at ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier.train(XGBoostClassifier.scala:196)
at ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier.train(XGBoostClassifier.scala:48)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:118)
at com.tencent.mig.profile.basicprofile.UserProfile.Test_Code$.main(Test_Code.scala:143)
at com.tencent.mig.profile.basicprofile.UserProfile.Test_Code.main(Test_Code.scala)
19/06/08 18:26:40 ERROR RabitTracker: Uncaught exception thrown by worker:
java.lang.InterruptedException
at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:998)
at java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1304)
at scala.concurrent.impl.Promise$DefaultPromise.tryAwait(Promise.scala:202)
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:218)
at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:153)
at org.apache.spark.util.ThreadUtils$.awaitReady(ThreadUtils.scala:222)
at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:610)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2029)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2050)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2069)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:2094)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:934)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1.apply(RDD.scala:932)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.foreachPartition(RDD.scala:932)
at ml.dmlc.xgboost4j.scala.spark.XGBoost$$anonfun$trainDistributed$1$$anon$1.run(XGBoost.scala:397)
19/06/08 18:26:45 INFO SparkContext: Invoking stop() from shutdown hook