Я понимаю, почему получаю эту ошибку, и не знаю, как ее исправить. Причина появления этой ошибки заключается в том, что мой слой decoder_dense
ожидает форму 9 слов, когда я даю ему целевое слово длиной 1. Я не понимаю, как исправить эту ошибку, потому что я не могу использовать первое модель, чтобы предсказать, когда у меня нет целевого предложения. Если необходимо, вот ссылка на сообщение в блоге Keras на seq 2 seq (https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html)
мой код, заимствованный из Keras:
from __future__ import print_function
from keras import Model
from keras.layers import Input, LSTM, Dense, Embedding, Flatten
import numpy as np
import tensorflow as tf
from keras.callbacks import ModelCheckpoint
data_limiter = 5
sentences = []
file = "movie_lines.txt"
#extract data from files
with open(file, "r", encoding="utf-8", buffering=1, errors = "ignore") as f:
lines = f.read().split("\n")
for line in lines[: min(data_limiter, len(lines) - 1)]:
words = line.split(' ')
del words[:8]
sentences.append(words)
input_texts = sentences#[::2]
target_texts = sentences[-(len(sentences)-1):]
for i in range(len(input_texts)):
input_texts[i].append("<EOS>")
input_texts[i].insert(0, "<SOS>")
input_characters = set()
target_characters = set()
for i in input_texts:
for char in i:
if char not in input_characters:
input_characters.add(char)
for i in target_texts:
for char in i:
if char not in target_characters:
target_characters.add(char)
input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])
print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)
input_token_index = dict(
[(char, i/num_encoder_tokens) for i, char in enumerate(input_characters)])
target_token_index = dict(
[(char, i/num_decoder_tokens) for i, char in enumerate(target_characters)])
encoder_input_data = np.zeros(
(len(input_texts), max_encoder_seq_length),
dtype='float32')
decoder_input_data = np.zeros(
(len(input_texts), max_decoder_seq_length),
dtype='float32')
decoder_target_data = np.zeros(
(len(input_texts), max_decoder_seq_length),
dtype='float32')
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
for t, char in enumerate(input_text):
encoder_input_data[i, t] = input_token_index[char]
for t, char in enumerate(target_text):
decoder_input_data[i, t] = target_token_index[char]
if t > 0:
decoder_target_data[i, t - 1] = target_token_index[char]
batch_size = 1 # Batch size for training.
epochs = 100 # Number of epochs to train for.
latent_dim = 256
embedding_size = 50
encoder_inputs = Input(shape=(max_encoder_seq_length,), name = "encoder_input")
embedding_encoder = Embedding(1, embedding_size, input_length= max_encoder_seq_length, name = "embedding_layer_encoder")
words_embedded = embedding_encoder(encoder_inputs)
encoder = LSTM(latent_dim, return_state=True, name = "encoder_LSTM")
encoder_outputs, state_h, state_c = encoder(words_embedded)
encoder_states = [state_h, state_c]
decoder_inputs = Input(shape=(max_decoder_seq_length,), name = "decoder_input")
embedding_decoder = Embedding(1, embedding_size, name = "embedding_layer_decoder")
words_embedded2 = embedding_decoder(decoder_inputs)
decoder_LSTM = LSTM(latent_dim, return_sequences=True, return_state=True, name = "decoder_LSTM")
decoder_outputs, _, _ = decoder_LSTM(words_embedded2,
initial_state=encoder_states)
flatten = Flatten( name = "flatten")
one_d = flatten(decoder_outputs)
print(decoder_outputs)
decoder_dense = Dense(max_decoder_seq_length, activation='softmax')
decoder_outputs = decoder_dense(one_d)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# model.summary()
# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
batch_size=batch_size,
epochs=1,
validation_split=0.2)
"""
Encode the input and get the initial decoder state
Run part of decoder with initial state and a "<SOS>" token as targe The output will be next word.
Append the target character predicted and repeat.
"""
encoder_model = Model(encoder_inputs, encoder_states)
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_inputs = Input(shape=(1,), name = "decoder_input")
embeddings = embedding_encoder(decoder_inputs)
decoder_outputs, state_h, state_c = decoder_LSTM(
embeddings, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
one_d = flatten(decoder_outputs)
decoder_outputs = decoder_dense(one_d)
decoder_model = Model(
[decoder_inputs] + decoder_states_inputs,
[decoder_outputs] + decoder_states)
decoder
def decode_sequence(input_seq):
states_value = encoder_model.predict(input_seq)
target_seq = np.zeros((1, 1))
target_seq[0, 0] = target_token_index["<SOS>"]
words_target = embedding_decoder(target_seq)
print(words_target.shape)
stop_condition = False
decoded_sentence = ''
while not stop_condition:
output_tokens, h, c = decoder_model.predict_on_batch([target_seq] + states_value)
# Sample a token
print(output_tokens)
# except Exception as e:
# pass
# break
# sampled_token_index = np.argmax(output_tokens[0, -1, :])
# sampled_char = reverse_target_char_index[sampled_token_index]
# decoded_sentence += sampled_char
# # Exit condition: either hit max length
# # or find stop character.
# if (sampled_char == '\n' or
# len(decoded_sentence) > max_decoder_seq_length):
# stop_condition = True
# # Update the target sequence (of length 1).
# target_seq = np.zeros((1, 1, num_decoder_tokens))
# target_seq[0, 0, sampled_token_index] = 1.
# # Update states
# states_value = [h, c]
# return decoded_sentence
decode_sequence(encoder_input_data[0])
данные можно найти здесь : https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html
ошибка:
decoder_outputs = decoder_dense(one_d) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 886, in __call__ self.name) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py", line 216, in assert_input_compatibility ' but received input with shape ' + str(shape)) ValueError: Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 2304 but received input with shape [None, 256]