Форма ввода предсказания ГРУ - PullRequest
0 голосов
/ 19 марта 2020

Я реализовал мультиклассовый классификатор в Керасе. Модель принимает один текст и две числовые характеристики. Во время предсказания я передаю ввод с правильной формой, но все равно получаю ошибку.

Код модели

X1_trn, MAX_LEN = process_data_tc(X_train,feat_name,target_label,data_type='train')
X2_trn = X_train[['claim_count','claim_length']].values

le = LabelEncoder()
le.fit(df[target_label])
y_transformed = le.transform(df[target_label]) # doubt
y_trn_transformed = le.transform(y_train) 
y_trn_target = to_categorical(y_trn_transformed)
joblib.dump(le, os.path.join(MODELS_PATH,'label_encoder_'+str(target_label)+'_gru.pkl'), compress=1)
# y_trn_transformed, y_trn_target = get_label_encoding_2(df,TARGET_NAME,y_train)
print(X1_trn.shape, y_trn_transformed.shape, y_trn_target.shape, X2_trn.shape)

X1_tst = process_data_tc(X_test,feat_name,target_label,data_type='test')
X2_tst = X_test[['claim_count','claim_length']].values

y_tst_transformed = le.transform(y_test)
y_tst_target = to_categorical(y_tst_transformed)

# y_tst_transformed, y_tst_target = get_label_encoding_2(df,TARGET_NAME,y_test)
print(X1_tst.shape,y_tst_transformed.shape,y_tst_target.shape,X2_tst.shape)

input1 = Input(shape=(MAX_LEN,))
input2 = Input(shape=(2,))
embedding1 = Embedding(input_dim=self.vocab_size,
                       output_dim=self.embedding_dim,
                       input_length=MAX_LEN)(input1)
lstm1 = Bidirectional(LSTM(units=self.number_gru_units,
                           recurrent_dropout=self.rate_drop_lstm,
                           dropout=self.rate_drop_lstm))(embedding1)
dense1 = Dense(units=16,activation=self.activation_function)(input2)
dense2 = Dense(units=8,activation=self.activation_function)(dense1)

concat_layer = Concatenate()([lstm1,dense2])
dense3 = Dense(units=5,activation=self.activation_function)(concat_layer)
output = Dense(units=len(np.unique(y_transformed)),activation='softmax')(dense3)
model = Model(inputs=[input1,input2],output=output)
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
model.summary()

Архитектура моей модели enter image description here

Input_5 принимает текстовые данные, а Input_6 принимает две числовые функции.

Мой код прогноза:

BEST_TC_MODEL_FILE = 'best_model_topic_complexity_path.npy'
TOKENIZER_TC_GRU_FILE = 'tokenizer_topic_complexity_gru.pkl'
LABEL_ENCODER_TC_GRU_FILE = 'label_encoder_topic_complexity_gru.pkl'
MAX_SEQUENCE_LENGTH = 300

from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences
import joblib

tc_lbl_encodr = joblib.load(os.path.join(MODELS_PATH,LABEL_ENCODER_TC_GRU_FILE))
tc_tokenizer = joblib.load(os.path.join(MODELS_PATH,TOKENIZER_TC_GRU_FILE))

TARGET_LABEL = 'topic_complexity'

model = load_biGRU(target_label=TARGET_LABEL)
print("Loaded GRU model from disk")
model._make_predict_function()
graph = tf.get_default_graph()
print('Built Model Graph')

X1_ts = tc_tokenizer.texts_to_sequences(df['processed_claim_text'].iloc[2])
X1_ts = pad_sequences(X1_ts, maxlen=MAX_SEQUENCE_LENGTH)
X2_ts = np.array([df.iloc[2]['claim_count'],df.iloc[2]['claim_length']])
print("Input Shape ",X2_ts.shape)

prediction = model.predict([X1_ts,X2_ts])
predictions_lbls = tc_lbl_encodr.inverse_transform([np.argmax(prediction,axis=1)])[0]
print(predictions_lbls)

Вот ошибка, которую я получил

Loading subject area model ../models/topic_complexity/checkpoints/1584536417/bilstm_141_180_0.36_0.32
Loaded GRU model from disk
Built Model Graph
Input Shape  (2,)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-126-64cba922ce0c> in <module>
     25 print("Input Shape ",X2_ts.shape)
     26 
---> 27 prediction = model.predict([X1_ts,X2_ts])
     28 predictions_lbls = tc_lbl_encodr.inverse_transform([np.argmax(prediction,axis=1)])[0]
     29 print(predictions_lbls)

~/.local/lib/python3.6/site-packages/keras/engine/training.py in predict(self, x, batch_size, verbose, steps)
   1147                              'argument.')
   1148         # Validate user data.
-> 1149         x, _, _ = self._standardize_user_data(x)
   1150         if self.stateful:
   1151             if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:

~/.local/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
    749             feed_input_shapes,
    750             check_batch_axis=False,  # Don't enforce the batch size.
--> 751             exception_prefix='input')
    752 
    753         if y is not None:

~/.local/lib/python3.6/site-packages/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    136                             ': expected ' + names[i] + ' to have shape ' +
    137                             str(shape) + ' but got array with shape ' +
--> 138                             str(data_shape))
    139     return data
    140 

ValueError: Error when checking input: expected input_6 to have shape (2,) but got array with shape (1,)
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...