Ошибка: «<» не поддерживается между экземплярами «str» и «int» при выполнении f1_score - PullRequest
0 голосов
/ 27 марта 2020

Я столкнулся с проблемой при попытке запустить f1_score и jaccard_similarity_score:

TypeError: '<' not supported between instances of 'str' and 'int'

мой код:

from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss

test_df = pd.read_csv('loan_test.csv')

test_df['due_date'] = pd.to_datetime(test_df['due_date'])
test_df['effective_date'] = pd.to_datetime(test_df['effective_date'])
test_df['dayofweek'] = test_df['effective_date'].dt.dayofweek
test_df['weekend'] = test_df['dayofweek'].apply(lambda x: 1 if (x>3)  else 0)
test_df['Gender'].replace(to_replace=['male','female'], value=[0,1],inplace=True)
Feature_test = test_df[['Principal','terms','age','Gender','weekend']]
Feature_test = pd.concat([Feature_test,pd.get_dummies(test_df['education'])], axis=1)
Feature_test.drop(['Master or Above'], axis = 1,inplace=True)

Feature_test.head()

X_testset=Feature_test
y_testset=pd.get_dummies(test_df['loan_status'])['PAIDOFF'].values
y_testset

out: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)

yhat_knn = neigh.predict(X_testset)
yhat_tree = deciTree.predict(X_testset)
yhat_svm = clf.predict(X_testset)
yhat_LR = LR.predict(X_testset)
y_pred_lr_proba=LR.predict_proba(X_testset)

yhat_knn
out: array(['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'],
  dtype=object)

yhat_tree
out: array(['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'],
  dtype=object)

yhat_svm
out: array(['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'],
  dtype=object)

yhat_LR
out: array(['COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
   'COLLECTION', 'COLLECTION'], dtype=object)

print(f1_score(y_testset,yhat_knn)
print(f1_score(y_testset,yhat_tree))
print(f1_score(y_testset,yhat_svm))
print(f1_score(y_testset,yhat_LR))

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-100-7adec8f787e3> in <module>
      4 #print(f1_score(y_testset,yhat_LR))
      5 
----> 6 f1_score(y_testset,yhat_knn)

~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight, zero_division)
   1097                        pos_label=pos_label, average=average,
   1098                        sample_weight=sample_weight,
-> 1099                        zero_division=zero_division)
   1100 
   1101 

~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight, zero_division)
   1224                                                  warn_for=('f-score',),
   1225                                                  sample_weight=sample_weight,
-> 1226                                                  zero_division=zero_division)
   1227     return f
   1228 

~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)
   1482         raise ValueError("beta should be >=0 in the F-beta score")
   1483     labels = _check_set_wise_labels(y_true, y_pred, average, labels,
-> 1484                                     pos_label)
   1485 
   1486     # Calculate tp_sum, pred_sum, true_sum ###

~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
   1299                          str(average_options))
   1300 
-> 1301     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
   1302     present_labels = unique_labels(y_true, y_pred)
   1303     if average == 'binary':

~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in _check_targets(y_true, y_pred)
    101         y_pred = column_or_1d(y_pred)
    102         if y_type == "binary":
--> 103             unique_values = np.union1d(y_true, y_pred)
    104             if len(unique_values) > 2:
    105                 y_type = "multiclass"

<__array_function__ internals> in union1d(*args, **kwargs)

~/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in union1d(ar1, ar2)
    735     array([1, 2, 3, 4, 6])
    736     """
--> 737     return unique(np.concatenate((ar1, ar2), axis=None))
    738 
    739 

<__array_function__ internals> in unique(*args, **kwargs)

~/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
    260     ar = np.asanyarray(ar)
    261     if axis is None:
--> 262         ret = _unique1d(ar, return_index, return_inverse, return_counts)
    263         return _unpack_tuple(ret)
    264 

~/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
    308         aux = ar[perm]
    309     else:
--> 310         ar.sort()
    311         aux = ar
    312     mask = np.empty(aux.shape, dtype=np.bool_)

TypeError: '<' not supported between instances of 'str' and 'int'

Для справки, я ' Мы также включили соответствующие значения ниже:

neigh = KNeighborsClassifier(n_neighbors = n).fit(X_train,y_train)
deciTree = DecisionTreeClassifier(criterion="entropy", max_depth=4)
clf = svm.SVC(kernel='rbf', gamma='scale
LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train, y_train)

Та же проблема возникает, когда я запускаю jaccard_sdentifity_score.

1 Ответ

0 голосов
/ 27 марта 2020

Странно, как вам удалось иметь переменную y_test в форме, отличной от соответствующей y_train переменной, которую вы использовали для обучения своих моделей (и не показанной здесь).

В любом случае, вы можно легко преобразовать ваш y_test в формат, совместимый с вашими yhat переменными, с помощью простого понимания списка, а затем вычислить необходимые баллы:

import numpy as np
from sklearn.metrics import f1_score

y_testset = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) # as given

yhat = ['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
   'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'] # as given

y_test = ['PAIDOFF' if x==1 else 'COLLECTION' for x in y_testset]

f1_score(y_test, yhat, pos_label='PAIDOFF')
# 0.851063829787234

В будущем имейте в виду, что оба аргумента Ваши метрики (y_test и y_hat) должны быть в одной и той же форме, т.е. либо как 0/1, либо как 'PAIDOFF'/'COLLECTION'. Если не как 0/1, вам может потребоваться указать, какой ярлык является положительным (1), как я делал выше с аргументом pos_label='PAIDOFF'.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...