Я играю с этой моделью https://vgpena.github.io/classifying-tweets-with-keras-and-tensorflow/ и пытаюсь создать модель анализа настроений Twitter. Я получаю
File "C:\Users\sam\Desktop\proje\load_model.py", line 71, in <module>
pred = model.predict(top_tweets)
File "C:\Users\sam\anaconda3\lib\site-packages\keras\engine\training.py", line 1441, in predict
x, _, _ = self._standardize_user_data(x)
File "C:\Users\sam\anaconda3\lib\site-packages\keras\engine\training.py", line 579, in _standardize_user_data
exception_prefix='input')
File "C:\Users\sam\anaconda3\lib\site-packages\keras\engine\training_utils.py", line 145, in standardize_input_data
str(data_shape))
ValueError: Error when checking input: expected dense_1_input to have shape (3000,) but got array with shape (1,)
Я искал, и похоже, что есть проблема с моей моделью, но я не могу точно определить проблему
Мой код:
model.py:
import json
import keras
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
import numpy as np
import pandas as pd
training = np.genfromtxt('training.1600000.processed.noemoticon.csv', delimiter=',', skip_header=1, usecols=(0, 5), dtype=None, encoding='latin-1')
train_x = [x[1] for x in training]
train_y = np.asarray([x[0] for x in training])
max_words = 3000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_x)
dictionary = tokenizer.word_index
with open('dict.json', 'w') as dictionary_file:
json.dump(dictionary, dictionary_file)
def convert_text_to_index_array(text):
return [dictionary[word] for word in kpt.text_to_word_sequence(text)]
allWordIndices = []
for text in train_x:
wordIndices = convert_text_to_index_array(text)
allWordIndices.append(wordIndices)
allWordIndices = np.asarray(allWordIndices)
train_y = tokenizer.sequences_to_matrix(allWordIndices, mode='binary')
train_x = keras.utils.to_categorical(train_x, 2)
model = Sequential()
model.add(Dense(512, input_shape=(max_words,), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
model.fit(train_x, train_y,
batch_size=32,
epochs=5,
verbose=1,
validation_split=0.1,
shuffle=True)
model_json = model.to_json()
with open('model.json', 'w') as json_file:
json_file.write(model_json)
model.save_weights('model.h5')
print('Model saved!')
load_model.py:
import json
import numpy as np
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer
from keras.models import model_from_json
from numpy.core._multiarray_umath import ndarray
import GetOldTweets3 as got
import pandas as pd
def get_tweets(username, top_only, max_tweets):
tweet_criteria = got.manager.TweetCriteria().setUsername(username) \
.setTopTweets(top_only) \
.setMaxTweets(max_tweets)
tweet = got.manager.TweetManager.getTweets(tweet_criteria)
text_tweets = [[tw.text] for tw in tweet]
top_tweets = pd.DataFrame(text_tweets)
return top_tweets
tokenizer = Tokenizer(num_words=3000)
labels = ['negative', 'pozitive']
with open('dict.json', 'r') as dictionary_file:
dictionary = json.load(dictionary_file)
def convert_to_index_array(text):
words = kpt.text_to_word_sequence(text)
wordIndices = []
for word in words:
if word in dictionary:
wordIndices.append(dictionary[word])
else:
print(("'%s' is getting ignored." % (word)))
return wordIndices
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)
model.load_weights('model.h5')
while True:
username = input("Enter an username:")
top_tweets = get_tweets(username,
top_only=True,
max_tweets=100)
evalSentence = top_tweets
if len([input]) < 1:
break
testArr = convert_to_index_array(evalSentence.to_string())
username = tokenizer.sequences_to_matrix([testArr], mode="binary")
pred = model.predict(top_tweets)
print(("Your profile %s; %f%% confidince" % (labels(np.argmax(pred)), pred[0][np.argmax(pred)] * 100)))
Спасибо