Я реализовал мультиклассовый классификатор в Керасе. Модель принимает один текст и две числовые характеристики. Во время предсказания я передаю ввод с правильной формой, но все равно получаю ошибку.
Код модели
X1_trn, MAX_LEN = process_data_tc(X_train,feat_name,target_label,data_type='train')
X2_trn = X_train[['claim_count','claim_length']].values
le = LabelEncoder()
le.fit(df[target_label])
y_transformed = le.transform(df[target_label]) # doubt
y_trn_transformed = le.transform(y_train)
y_trn_target = to_categorical(y_trn_transformed)
joblib.dump(le, os.path.join(MODELS_PATH,'label_encoder_'+str(target_label)+'_gru.pkl'), compress=1)
# y_trn_transformed, y_trn_target = get_label_encoding_2(df,TARGET_NAME,y_train)
print(X1_trn.shape, y_trn_transformed.shape, y_trn_target.shape, X2_trn.shape)
X1_tst = process_data_tc(X_test,feat_name,target_label,data_type='test')
X2_tst = X_test[['claim_count','claim_length']].values
y_tst_transformed = le.transform(y_test)
y_tst_target = to_categorical(y_tst_transformed)
# y_tst_transformed, y_tst_target = get_label_encoding_2(df,TARGET_NAME,y_test)
print(X1_tst.shape,y_tst_transformed.shape,y_tst_target.shape,X2_tst.shape)
input1 = Input(shape=(MAX_LEN,))
input2 = Input(shape=(2,))
embedding1 = Embedding(input_dim=self.vocab_size,
output_dim=self.embedding_dim,
input_length=MAX_LEN)(input1)
lstm1 = Bidirectional(LSTM(units=self.number_gru_units,
recurrent_dropout=self.rate_drop_lstm,
dropout=self.rate_drop_lstm))(embedding1)
dense1 = Dense(units=16,activation=self.activation_function)(input2)
dense2 = Dense(units=8,activation=self.activation_function)(dense1)
concat_layer = Concatenate()([lstm1,dense2])
dense3 = Dense(units=5,activation=self.activation_function)(concat_layer)
output = Dense(units=len(np.unique(y_transformed)),activation='softmax')(dense3)
model = Model(inputs=[input1,input2],output=output)
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
model.summary()
Архитектура моей модели
Input_5 принимает текстовые данные, а Input_6 принимает две числовые функции.
Мой код прогноза:
BEST_TC_MODEL_FILE = 'best_model_topic_complexity_path.npy'
TOKENIZER_TC_GRU_FILE = 'tokenizer_topic_complexity_gru.pkl'
LABEL_ENCODER_TC_GRU_FILE = 'label_encoder_topic_complexity_gru.pkl'
MAX_SEQUENCE_LENGTH = 300
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences
import joblib
tc_lbl_encodr = joblib.load(os.path.join(MODELS_PATH,LABEL_ENCODER_TC_GRU_FILE))
tc_tokenizer = joblib.load(os.path.join(MODELS_PATH,TOKENIZER_TC_GRU_FILE))
TARGET_LABEL = 'topic_complexity'
model = load_biGRU(target_label=TARGET_LABEL)
print("Loaded GRU model from disk")
model._make_predict_function()
graph = tf.get_default_graph()
print('Built Model Graph')
X1_ts = tc_tokenizer.texts_to_sequences(df['processed_claim_text'].iloc[2])
X1_ts = pad_sequences(X1_ts, maxlen=MAX_SEQUENCE_LENGTH)
X2_ts = np.array([df.iloc[2]['claim_count'],df.iloc[2]['claim_length']])
print("Input Shape ",X2_ts.shape)
prediction = model.predict([X1_ts,X2_ts])
predictions_lbls = tc_lbl_encodr.inverse_transform([np.argmax(prediction,axis=1)])[0]
print(predictions_lbls)
Вот ошибка, которую я получил
Loading subject area model ../models/topic_complexity/checkpoints/1584536417/bilstm_141_180_0.36_0.32
Loaded GRU model from disk
Built Model Graph
Input Shape (2,)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-126-64cba922ce0c> in <module>
25 print("Input Shape ",X2_ts.shape)
26
---> 27 prediction = model.predict([X1_ts,X2_ts])
28 predictions_lbls = tc_lbl_encodr.inverse_transform([np.argmax(prediction,axis=1)])[0]
29 print(predictions_lbls)
~/.local/lib/python3.6/site-packages/keras/engine/training.py in predict(self, x, batch_size, verbose, steps)
1147 'argument.')
1148 # Validate user data.
-> 1149 x, _, _ = self._standardize_user_data(x)
1150 if self.stateful:
1151 if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:
~/.local/lib/python3.6/site-packages/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, check_array_lengths, batch_size)
749 feed_input_shapes,
750 check_batch_axis=False, # Don't enforce the batch size.
--> 751 exception_prefix='input')
752
753 if y is not None:
~/.local/lib/python3.6/site-packages/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
136 ': expected ' + names[i] + ' to have shape ' +
137 str(shape) + ' but got array with shape ' +
--> 138 str(data_shape))
139 return data
140
ValueError: Error when checking input: expected input_6 to have shape (2,) but got array with shape (1,)