Я пытаюсь оценить свою модель с помощью оценщика бинарной классификации, но продолжаю получать вышеуказанную ошибку, несмотря на то, что «rawPrediction» действительно существует.
Я использую модель НЛП с использованием логистической регрессии, подгонка и преобразование прошли хорошо, но оценкабыла проблема
Я пытался изменить оценщик на мультикласс и регрессию, хотя это проблема классификации, но сохраняется та же ошибка
# creating bag of word model
from pyspark.ml.feature import CountVectorizer, StringIndexer
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
cv = CountVectorizer(inputCol = 'bagofwords', outputCol='vectors') #features
si = StringIndexer(inputCol = 'Sentiment', outputCol = 'label') #labels
lr = LogisticRegression(featuresCol = 'vectors', labelCol = 'label', maxIter = 10, regParam = 0.3, elasticNetParam = 0.8)
pipeline = Pipeline(stages = [cv, si, lr])
model = pipeline.fit(df_train)
prediction = model.transform(df_test)
prediction.take(1)
[Row(Sentiment='negative', text='!!!!!!TO AVOID!!!!!\n\nFirst of all, as we enter, the host was very rude (and seemed not to like his job). We get seated, and then my husband had to GET UP to ask for menus and water. After getting it, we ask for the 2 for 22$ menu, we were told the boss at this location was not offering it anymore. The 2 for 22$ being the main appeal of the restaurant, we got up and left to go to the Sherbrooke location. \n\n\nI have been to the sherbrooke location before and still now i have nothing to complain about. A1 service.\n\nWhat a shame for the Madisons restaurant line.', bagofwords=['main', 'still', 'seated', 'what', 'rude', 'not', 'nothing', 'then', 'location', 'got', 'sherbrooke', 'get', 'appeal', 'as', 'enter', 'offering', 'left', 'first', 'madisons', 'been', 'husband', 'told', 'the', 'sherbrooke', 'ask', 'get', 'to', 'being', 'go', 'after', 'anymore', 'a', 'very', 'like', 'menus', 'menu', 'getting', 'service', 'line', 'restaurant', 'avoid', 'complain', 'job', 'host', 'seemed', 'shame', 'boss', 'up', 'water'], vectors=SparseVector(191243, {0: 1.0, 1: 1.0, 7: 1.0, 9: 1.0, 12: 1.0, 13: 2.0, 14: 1.0, 25: 1.0, 31: 1.0, 40: 1.0, 65: 1.0, 78: 1.0, 81: 1.0, 87: 1.0, 98: 1.0, 105: 1.0, 120: 1.0, 125: 1.0, 146: 1.0, 187: 1.0, 193: 1.0, 209: 1.0, 215: 1.0, 218: 1.0, 228: 1.0, 282: 1.0, 283: 1.0, 308: 1.0, 317: 1.0, 330: 1.0, 420: 1.0, 422: 1.0, 463: 1.0, 503: 1.0, 612: 1.0, 876: 1.0, 1202: 1.0, 1277: 1.0, 1584: 1.0, 1588: 1.0, 1602: 1.0, 1714: 1.0, 1968: 1.0, 3140: 1.0, 4329: 1.0, 19856: 2.0, 51462: 1.0}), label=1.0, rawPrediction=DenseVector([1.0612, -1.0612]), probability=DenseVector([0.7429, 0.2571]), prediction=0.0)]
from pyspark.ml.evaluation import BinaryClassificationEvaluator
assess = BinaryClassificationEvaluator().setMetricName('areaUnderROC').setLabelCol('label').setRawPredictionCol('rawPrediction')
assess.evaluate(df_test)
Я ожидаю получить значение или показатель, который указываетточность или подобие этого, но я продолжаю получать эту ошибку:
IllegalArgumentException Traceback (most recent call last)
<ipython-input-41-d1448386660e> in <module>()
5 #eval = RegressionEvaluator().setMetricName('rmse').setLabelCol('label').setPredictionCol('prediction')
6
----> 7 assess.evaluate(df_test)
/usr/local/src/spark21master/spark/python/pyspark/ml/evaluation.py in evaluate(self, dataset, params)
67 return self.copy(params)._evaluate(dataset)
68 else:
---> 69 return self._evaluate(dataset)
70 else:
71 raise ValueError("Params must be a param map but got %s." % type(params))
/usr/local/src/spark21master/spark/python/pyspark/ml/evaluation.py in _evaluate(self, dataset)
97 """
98 self._transfer_params_to_java()
---> 99 return self._java_obj.evaluate(dataset._jdf)
100
101 def isLargerBetter(self):
/usr/local/src/spark21master/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/usr/local/src/spark21master/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
77 raise QueryExecutionException(s.split(': ', 1)[1], stackTrace)
78 if s.startswith('java.lang.IllegalArgumentException: '):
---> 79 raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
80 raise
81 return deco
IllegalArgumentException: 'Field "rawPrediction" does not exist.'