Я столкнулся с проблемой при попытке запустить f1_score
и jaccard_similarity_score
:
TypeError: '<' not supported between instances of 'str' and 'int'
мой код:
from sklearn.metrics import jaccard_similarity_score
from sklearn.metrics import f1_score
from sklearn.metrics import log_loss
test_df = pd.read_csv('loan_test.csv')
test_df['due_date'] = pd.to_datetime(test_df['due_date'])
test_df['effective_date'] = pd.to_datetime(test_df['effective_date'])
test_df['dayofweek'] = test_df['effective_date'].dt.dayofweek
test_df['weekend'] = test_df['dayofweek'].apply(lambda x: 1 if (x>3) else 0)
test_df['Gender'].replace(to_replace=['male','female'], value=[0,1],inplace=True)
Feature_test = test_df[['Principal','terms','age','Gender','weekend']]
Feature_test = pd.concat([Feature_test,pd.get_dummies(test_df['education'])], axis=1)
Feature_test.drop(['Master or Above'], axis = 1,inplace=True)
Feature_test.head()
X_testset=Feature_test
y_testset=pd.get_dummies(test_df['loan_status'])['PAIDOFF'].values
y_testset
out: array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
yhat_knn = neigh.predict(X_testset)
yhat_tree = deciTree.predict(X_testset)
yhat_svm = clf.predict(X_testset)
yhat_LR = LR.predict(X_testset)
y_pred_lr_proba=LR.predict_proba(X_testset)
yhat_knn
out: array(['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'],
dtype=object)
yhat_tree
out: array(['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'],
dtype=object)
yhat_svm
out: array(['PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF',
'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF', 'PAIDOFF'],
dtype=object)
yhat_LR
out: array(['COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION', 'COLLECTION', 'COLLECTION',
'COLLECTION', 'COLLECTION'], dtype=object)
print(f1_score(y_testset,yhat_knn)
print(f1_score(y_testset,yhat_tree))
print(f1_score(y_testset,yhat_svm))
print(f1_score(y_testset,yhat_LR))
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-100-7adec8f787e3> in <module>
4 #print(f1_score(y_testset,yhat_LR))
5
----> 6 f1_score(y_testset,yhat_knn)
~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight, zero_division)
1097 pos_label=pos_label, average=average,
1098 sample_weight=sample_weight,
-> 1099 zero_division=zero_division)
1100
1101
~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight, zero_division)
1224 warn_for=('f-score',),
1225 sample_weight=sample_weight,
-> 1226 zero_division=zero_division)
1227 return f
1228
~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)
1482 raise ValueError("beta should be >=0 in the F-beta score")
1483 labels = _check_set_wise_labels(y_true, y_pred, average, labels,
-> 1484 pos_label)
1485
1486 # Calculate tp_sum, pred_sum, true_sum ###
~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
1299 str(average_options))
1300
-> 1301 y_type, y_true, y_pred = _check_targets(y_true, y_pred)
1302 present_labels = unique_labels(y_true, y_pred)
1303 if average == 'binary':
~/anaconda3/lib/python3.7/site-packages/sklearn/metrics/_classification.py in _check_targets(y_true, y_pred)
101 y_pred = column_or_1d(y_pred)
102 if y_type == "binary":
--> 103 unique_values = np.union1d(y_true, y_pred)
104 if len(unique_values) > 2:
105 y_type = "multiclass"
<__array_function__ internals> in union1d(*args, **kwargs)
~/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in union1d(ar1, ar2)
735 array([1, 2, 3, 4, 6])
736 """
--> 737 return unique(np.concatenate((ar1, ar2), axis=None))
738
739
<__array_function__ internals> in unique(*args, **kwargs)
~/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in unique(ar, return_index, return_inverse, return_counts, axis)
260 ar = np.asanyarray(ar)
261 if axis is None:
--> 262 ret = _unique1d(ar, return_index, return_inverse, return_counts)
263 return _unpack_tuple(ret)
264
~/anaconda3/lib/python3.7/site-packages/numpy/lib/arraysetops.py in _unique1d(ar, return_index, return_inverse, return_counts)
308 aux = ar[perm]
309 else:
--> 310 ar.sort()
311 aux = ar
312 mask = np.empty(aux.shape, dtype=np.bool_)
TypeError: '<' not supported between instances of 'str' and 'int'
Для справки, я ' Мы также включили соответствующие значения ниже:
neigh = KNeighborsClassifier(n_neighbors = n).fit(X_train,y_train)
deciTree = DecisionTreeClassifier(criterion="entropy", max_depth=4)
clf = svm.SVC(kernel='rbf', gamma='scale
LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train, y_train)
Та же проблема возникает, когда я запускаю jaccard_sdentifity_score.