мультиклассовая классификация метрик ошибок Кераса - PullRequest
0 голосов
/ 25 января 2019

Я пытаюсь заставить f1-счет работать правильно для задачи классификации нескольких классов.

Я получаю ValueError: Классификационные метрики не могут обрабатывать сочетание целей с несколькими метками и двоичными целями

Обработка для целевой переменной

encoder = preprocessing.LabelEncoder()
encoded_y_train = encoder.fit_transform(y_train)
dummy_y_train = np_utils.to_categorical(encoded_y_train)

Моя форма данных X_train и y_train

print(train_ohe.shape,dummy_y_train.shape)
# (43266, 189) (43266, 5)

Пользовательская метрика скоринга F1

from keras import backend as K

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

Вот архитектура модели

# basline model
def baseline_model():
    model = Sequential()
    model.add(Dense(8,input_dim=189,activation='relu'))
    model.add(Dense(5,activation='softmax'))
    # compile
    model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=[f1])
    return model

estimators = []
estimators.append(('mlp', KerasClassifier(build_fn=baseline_model,epochs=1,batch_size=16,verbose=1)))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10, shuffle=True, random_state=876)
results = cross_val_score(pipeline, train_ohe, dummy_y_train, cv=kfold, scoring='f1')

Вот ошибка, которую я получаю при использовании метрики f1

Epoch 1/1
38939/38939 [==============================] - 3s 76us/step - loss: 15.9096 - f1: 0.0128
4327/4327 [==============================] - 1s 150us/step
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-79-ef9e0f41eb14> in <module>
     15 kfold = KFold(n_splits=10, shuffle=True, random_state=876)
     16 # results = cross_val_score(pipeline, train_ohe, dummy_y_train, cv=kfold, scoring='f1')
---> 17 results = cross_val_score(pipeline, train_ohe, dummy_y_train, cv=kfold, scoring='precision')
     18 
     19 

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
    400                                 fit_params=fit_params,
    401                                 pre_dispatch=pre_dispatch,
--> 402                                 error_score=error_score)
    403     return cv_results['test_score']
    404 

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
    238             return_times=True, return_estimator=return_estimator,
    239             error_score=error_score)
--> 240         for train, test in cv.split(X, y, groups))
    241 
    242     zipped_scores = list(zip(*scores))

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    566         fit_time = time.time() - start_time
    567         # _score will return dict if is_multimetric is True
--> 568         test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
    569         score_time = time.time() - start_time - fit_time
    570         if return_train_score:

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _score(estimator, X_test, y_test, scorer, is_multimetric)
    603     """
    604     if is_multimetric:
--> 605         return _multimetric_score(estimator, X_test, y_test, scorer)
    606     else:
    607         if y_test is None:

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _multimetric_score(estimator, X_test, y_test, scorers)
    633             score = scorer(estimator, X_test)
    634         else:
--> 635             score = scorer(estimator, X_test, y_test)
    636 
    637         if hasattr(score, 'item'):

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/scorer.py in __call__(self, estimator, X, y_true, sample_weight)
     96         else:
     97             return self._sign * self._score_func(y_true, y_pred,
---> 98                                                  **self._kwargs)
     99 
    100 

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/classification.py in precision_score(y_true, y_pred, labels, pos_label, average, sample_weight)
   1267                                                  average=average,
   1268                                                  warn_for=('precision',),
-> 1269                                                  sample_weight=sample_weight)
   1270     return p
   1271 

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight)
   1029         raise ValueError("beta should be >0 in the F-beta score")
   1030 
-> 1031     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
   1032     check_consistent_length(y_true, y_pred, sample_weight)
   1033     present_labels = unique_labels(y_true, y_pred)

/opt/virtual_env/py3/lib/python3.6/site-packages/sklearn/metrics/classification.py in _check_targets(y_true, y_pred)
     79     if len(y_type) > 1:
     80         raise ValueError("Classification metrics can't handle a mix of {0} "
---> 81                          "and {1} targets".format(type_true, type_pred))
     82 
     83     # We can't have more than one value on y_type => The set is no more needed

ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets
...