Я использую модель Keras ML, используя набор данных IMDB, и использую модель (https://www.kaggle.com/eriche523/bigram-keras-explained) по ссылке
код выглядит следующим образом
df = pd.read_csv("../IMDB Dataset.csv", names=['comment', 'label'], header=0, encoding='utf-8')
df=pd.DataFrame(df)
df['label'] = df.label.astype('category').cat.codes
df['total_words'] = df['comment'].str.count(' ') + 1
num_class = len(np.unique(df.label.values))
y = df['label'].values
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vect_texts = vectorizer.fit_transform(list(df['comment']))
all_ngrams = vectorizer.get_feature_names()
num_ngrams = min(50, len(all_ngrams))
all_counts = vect_texts.sum(axis=0).tolist()[0]
all_ngrams, all_counts = zip(*[(n, c) for c, n in sorted(zip(all_counts, all_ngrams), reverse=True)])
ngrams = all_ngrams[:num_ngrams]
counts = all_counts[:num_ngrams]
idx = np.arange(num_ngrams)
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import SelectKBest, chi2,f_classif, mutual_info_classif
stopwords=stopwords.words('english')
vectorizer = TfidfVectorizer(min_df=3, binary=True, analyzer='word',ngram_range= (1,2), stop_words=stopwords)
df_bigram = vectorizer.fit_transform(df['comment'])
k=26000
selector = SelectKBest(chi2, k=min(k, df_bigram.shape[1]))
selector.fit(df_bigram, df.label)
transformed_texts = selector.transform(df_bigram).astype('float32')
transformed_texts=transformed_texts.toarray()
X_train, X_test, y_train, y_test = train_test_split(transformed_texts, y, test_size=0.3)
max_features = min(k, df_bigram.shape[1])
model = Sequential()
model.add(Dense(64, input_dim=max_features, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='RMSprop',
metrics=['acc'])
model.summary()
filepath="weights-simple.hdf5"
checkpointer = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
history = model.fit([X_train], batch_size=64, y=to_categorical(y_train), verbose=1, validation_split=0.25,
shuffle=True, epochs=15, callbacks=[checkpointer])
predicted = model.predict(X_test)
predicted_best = np.argmax(predicted, axis=1)
print (accuracy_score(predicted_best, y_test))
predicted=pd.DataFrame(data=predicted)
Я хочу видеть обзор в X_test и соответствующий прогнозируемый ярлык для каждого отзыва, который я написал ниже, код, но я не могу получить обзор, для которого ставится оценка
index = np.random.randint(X_test.shape[0])
label = (np.expand_dims(X_test[label], 0))
predictions = model.predict(label)
pred_label = np.argmax(predictions[0])
Пожалуйста, помогите мне в этом , Заранее спасибо