Это модель, которую я построил. Пожалуйста, помогите мне понять, есть ли проблема с моей моделью или любая другая проблема, с которой я столкнулся. Ошибка возникает после этого:
Train on 63828 samples, validate on 95743 samples
Epoch 1/1
63744/63828 [============================>.] - ETA: 2s - loss: 0.3427 - acc: 0.9943
Ошибка возникает в конце. Поэтому я удалил авлидацию, установленную во время тренировки.
from tensorflow.python.keras.layers import Embedding, Input
from tensorflow.python.keras.layers import LSTM, Bidirectional, GlobalMaxPool1D, Dropout
embedding_layer = Embedding(num_of_words, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False)
#building the model
#INPUT LAYER
input_layer = Input((MAX_SEQUENCE_LENGTH,))
#EMBEDDING LAYER
embedding_layer = embedding_layer(input_layer)
#BI-LSTM LAYER
lstm_layer_output = Bidirectional(LSTM(128, return_sequences=True))(embedding_layer)
lstm, forward_h, forward_c, backward_h, backward_c = Bidirectional \
(LSTM
(128,
dropout=0.2,
return_sequences=True,
return_state=True,
recurrent_activation='relu',
recurrent_initializer='glorot_uniform'))(embedding_layer)
from tensorflow.python.keras import backend as K
#CNN LAYER WITH KERNELS 3,4,5
from tensorflow.python.keras.layers import Conv1D, MaxPooling1D
first_conv_layer = Conv1D(128, 3, activation='relu')(lstm_layer_output)
first_max_pooling_layer = MaxPooling1D(3)(first_conv_layer)
second_conv_layer = Conv1D(128, 4, activation='relu')(first_max_pooling_layer)
second_max_pooling_layer = MaxPooling1D(4)(second_conv_layer)
third_conv_layer = Conv1D(128, 5, activation='relu')(second_max_pooling_layer)
#third_max_pooling_layer = MaxPooling1D(5)(third_conv_layer)
global_max_pooling = GlobalMaxPool1D()(third_conv_layer)
#from tensorflow.python.keras.layers import Concatenate
#merged_pooling_layers = Concatenate(axis=1)([first_max_pooling_layer,second_max_pooling_layer,third_max_pooling_layer])
#global_max_pooling = GlobalMaxPool1D()(merged_pooling_layers)
#implementing attentionlayer manually
from tensorflow.python.keras.layers import Add
rnn_output = Add()([forward_h,backward_h])
hidden_size = int(lstm.shape[2])
from tensorflow.python.keras.layers import Lambda
hsf = Lambda(lambda x: x[:, -1], output_shape=(hidden_size,), name='last_hidden_state_forward')(rnn_output)
from tensorflow.python.keras.layers import Multiply
from tensorflow.python.keras.layers import Lambda
def norm(m):
return K.transpose(m)
u_t = Multiply()([Lambda(norm)(rnn_output),hsf])
context_vector = Multiply()([u_t,global_max_pooling])
def ex(m):
return K.exp(context_vector)
exp_u_t = Lambda(ex)(context_vector)
from tensorflow.python.keras.layers import Dense
attention_vector = Dense(128,activation='softmax')(exp_u_t)
x = Dense(64,activation="softmax")(weighted_input)
output_layer = Dense(6,activation="softmax")(x)
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.optimizers import Adam
model = Model(input_layer,output_layer)
from tensorflow.python.keras import optimizers
model.compile(
loss='categorical_crossentropy',
optimizer='sgd',
metrics=['accuracy']
)
print('Training model...')
r = model.fit(
data,
target_values,
batch_size=128,
epochs=1,
validation_split=0.0
)
Я получил ошибку:
InvalidArgumentError (see above for traceback): Incompatible shapes: [84,6] vs. [128,6]
[[Node: training/SGD/gradients/loss/dense_3_loss/mul_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/SGD/gradients/loss/dense_3_loss/mul_grad/Reshape_1"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/SGD/gradients/loss/dense_3_loss/mul_grad/Shape, training/SGD/gradients/loss/dense_3_loss/mul_grad/Shape_1)]]
Пожалуйста, помогите мне решить эту проблему. Спасибо
Редактировать : Это трассировка ошибки
Epoch 1/1
31872/31914 [============================>.] - ETA: 1s - loss: 0.2419 Traceback (most recent call last):
File "<ipython-input-1-a7cc2e59a772>", line 165, in <module>
validation_split=0.8
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training.py", line 1216, in fit
validation_steps=validation_steps)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training_arrays.py", line 245, in fit_loop
outs = f(ins_batch)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\backend.py", line 2824, in __call__
fetches=fetches, feed_dict=feed_dict, **self.session_kwargs)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
run_metadata_ptr)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
run_metadata)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1335, in _do_call
raise type(e)(node_def, op, message)
InvalidArgumentError: Incompatible shapes: [128,6] vs. [42,6]
[[Node: training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/Shape, training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/Shape_1)]]
Caused by op 'training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/BroadcastGradientArgs', defined at:
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 268, in <module>
main()
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 264, in main
kernel.start()
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 478, in start
self.io_loop.start()
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2728, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2850, in run_ast_nodes
if self.run_code(code, result):
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-a7cc2e59a772>", line 165, in <module>
validation_split=0.8
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training.py", line 1216, in fit
validation_steps=validation_steps)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training_arrays.py", line 90, in fit_loop
model._make_train_function()
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training.py", line 572, in _make_train_function
params=self._collected_trainable_weights, loss=self.total_loss)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\optimizers.py", line 208, in get_updates
grads = self.get_gradients(loss, params)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\optimizers.py", line 114, in get_gradients
grads = K.gradients(loss, params)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\backend.py", line 2866, in gradients
loss, variables, colocate_gradients_with_ops=True)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 494, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 636, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 385, in _MaybeCompile
return grad_fn() # Exit early
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\gradients_impl.py", line 636, in <lambda>
lambda: grad_fn(op, *out_grads))
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\math_grad.py", line 874, in _MulGrad
rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 673, in broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
op_def=op_def)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
...which was originally created as op 'loss/dense_3_loss/logistic_loss/mul', defined at:
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 268, in <module>
main()
[elided 16 identical lines from previous traceback]
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2910, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-a7cc2e59a772>", line 153, in <module>
optimizer='sgd',
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training.py", line 428, in compile
output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\training_utils.py", line 438, in weighted
score_array = fn(y_true, y_pred)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\losses.py", line 116, in binary_crossentropy
return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\keras\_impl\keras\backend.py", line 3448, in binary_crossentropy
return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_impl.py", line 181, in sigmoid_cross_entropy_with_logits
relu_logits - logits * labels,
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 979, in binary_op_wrapper
return func(x, y, name=name)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1211, in _mul_dispatch
return gen_math_ops.mul(x, y, name=name)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 4758, in mul
"Mul", x=x, y=y, name=name)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3392, in create_op
op_def=op_def)
File "C:\Users\JCMat\New\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Incompatible shapes: [128,6] vs. [42,6]
[[Node: training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/Reshape"], _device="/job:localhost/replica:0/task:0/device:CPU:0"](training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/Shape, training/SGD/gradients/loss/dense_3_loss/logistic_loss/mul_grad/Shape_1)]]