Вот мой код:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve,auc,accuracy_score, precision_score, recall_score, f1_score,confusion_matrix
from keras.utils import to_categorical
def lgtcmodelquadratic(X_train,Y_train,X_test,Y_test):
quadratic_featurizer = PolynomialFeatures(degree=2)
X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
model = LogisticRegression(penalty='none',solver = 'saga')
model.fit(X_train_quadratic,Y_train)
X_test_quadratic = quadratic_featurizer.fit_transform(X_test)
Y_pred = model.predict(X_test_quadratic)
accu=accuracy_score(Y_test, Y_pred)
p=precision_score(Y_test, Y_pred,pos_label='1',average='weighted')
r=recall_score(Y_test, Y_pred,pos_label='1',average='weighted')
f1=f1_score(Y_test, Y_pred,pos_label='1',average='weighted')
results = [accu, p, r, f1]
print('The model evaluate(accu,pre,recall,f1) of Polynomial LogisticRegression Model is: \n',results)
return results
Мои данные - векторы TF-IDF, форма которых раздельно (20460, 231717) 、 (5116, 231717) 、 (20460,) 、 (5116,)
Потом я встретился с ошибкой AssertionEr:
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-98-d19f03adac92> in <module>
----> 1 lgtcmodelquadratic(X_train,Y_train,X_test,Y_test)
<ipython-input-97-ee7d01e08836> in lgtcmodelquadratic(X_train, Y_train, X_test, Y_test)
2 def lgtcmodelquadratic(X_train,Y_train,X_test,Y_test):
3 quadratic_featurizer = PolynomialFeatures(degree=2)
----> 4 X_train_quadratic = quadratic_featurizer.fit_transform(X_train)
5 model = LogisticRegression(penalty='none',solver = 'saga')
6 model.fit(X_train_quadratic,Y_train)
D:\anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
551 if y is None:
552 # fit method of arity 1 (unsupervised transformation)
--> 553 return self.fit(X, **fit_params).transform(X)
554 else:
555 # fit method of arity 2 (supervised transformation)
D:\anaconda3\lib\site-packages\sklearn\preprocessing\data.py in transform(self, X)
1519 X.indptr, X.shape[1],
1520 self.interaction_only,
-> 1521 deg)
1522 if Xp_next is None:
1523 break
sklearn\preprocessing\_csr_polynomial_expansion.pyx in sklearn.preprocessing._csr_polynomial_expansion._csr_polynomial_expansion()
AssertionError:
Я не могу найти какие-либо способы исправить это. Это моя огромная форма данных, которая привела к этой ситуации? или могут быть какие-то другие причины? Искренне обращаюсь за помощью.