ValueError: вход 0 плотности слоя несовместим со слоем: при использовании модели Keras seq 2 seq? - PullRequest
0 голосов
/ 05 августа 2020

Я понимаю, почему получаю эту ошибку, и не знаю, как ее исправить. Причина появления этой ошибки заключается в том, что мой слой decoder_dense ожидает форму 9 слов, когда я даю ему целевое слово длиной 1. Я не понимаю, как исправить эту ошибку, потому что я не могу использовать первое модель, чтобы предсказать, когда у меня нет целевого предложения. Если необходимо, вот ссылка на сообщение в блоге Keras на seq 2 seq (https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html)

мой код, заимствованный из Keras:

from __future__ import print_function

from keras import Model
from keras.layers import Input, LSTM, Dense, Embedding, Flatten
import numpy as np
import tensorflow as tf
from keras.callbacks import ModelCheckpoint


data_limiter = 5
sentences = []
file = "movie_lines.txt"


#extract data from files        
with open(file, "r", encoding="utf-8", buffering=1, errors = "ignore") as f:
    lines = f.read().split("\n")

for line in lines[: min(data_limiter, len(lines) - 1)]:
    words = line.split(' ')
    del words[:8]
    
    sentences.append(words)

input_texts = sentences#[::2]
target_texts = sentences[-(len(sentences)-1):]


for i in range(len(input_texts)):
    input_texts[i].append("<EOS>")
    input_texts[i].insert(0, "<SOS>")


input_characters = set()
target_characters = set()


for i in input_texts:
    for char in i:
        if char not in input_characters:
            input_characters.add(char)

for i in target_texts:
    for char in i:
        if char not in target_characters:
            target_characters.add(char)
            

input_characters = sorted(list(input_characters))
target_characters = sorted(list(target_characters))
num_encoder_tokens = len(input_characters)
num_decoder_tokens = len(target_characters)
max_encoder_seq_length = max([len(txt) for txt in input_texts])
max_decoder_seq_length = max([len(txt) for txt in target_texts])

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)

input_token_index = dict(
    [(char, i/num_encoder_tokens) for i, char in enumerate(input_characters)])
target_token_index = dict(
    [(char, i/num_decoder_tokens) for i, char in enumerate(target_characters)])



encoder_input_data = np.zeros(
    (len(input_texts), max_encoder_seq_length),
    dtype='float32')
decoder_input_data = np.zeros(
    (len(input_texts), max_decoder_seq_length),
    dtype='float32')
decoder_target_data = np.zeros(
    (len(input_texts), max_decoder_seq_length),
    dtype='float32')



for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
    for t, char in enumerate(input_text):
        encoder_input_data[i, t] = input_token_index[char]
    
    for t, char in enumerate(target_text):


        decoder_input_data[i, t] = target_token_index[char]
        if t > 0:

            decoder_target_data[i, t - 1] = target_token_index[char]


batch_size = 1  # Batch size for training.
epochs = 100  # Number of epochs to train for.
latent_dim = 256
embedding_size = 50

encoder_inputs = Input(shape=(max_encoder_seq_length,), name = "encoder_input")
embedding_encoder = Embedding(1, embedding_size, input_length= max_encoder_seq_length, name = "embedding_layer_encoder")

words_embedded = embedding_encoder(encoder_inputs)


encoder = LSTM(latent_dim, return_state=True, name = "encoder_LSTM")

encoder_outputs, state_h, state_c = encoder(words_embedded)

encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(max_decoder_seq_length,), name = "decoder_input")

embedding_decoder = Embedding(1, embedding_size, name = "embedding_layer_decoder")

words_embedded2 = embedding_decoder(decoder_inputs)

decoder_LSTM = LSTM(latent_dim, return_sequences=True, return_state=True, name = "decoder_LSTM")

decoder_outputs, _, _ = decoder_LSTM(words_embedded2,
                                     initial_state=encoder_states)
flatten = Flatten( name = "flatten")
one_d = flatten(decoder_outputs)
print(decoder_outputs)

decoder_dense = Dense(max_decoder_seq_length, activation='softmax')

decoder_outputs = decoder_dense(one_d)


model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# model.summary()
# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=1,
          validation_split=0.2)

"""
Encode the input and get the initial decoder state
Run part of decoder with initial state and a "<SOS>" token as targe The output will be next word.
Append the target character predicted and repeat.
"""
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_inputs = Input(shape=(1,), name = "decoder_input")

embeddings = embedding_encoder(decoder_inputs)

decoder_outputs, state_h, state_c = decoder_LSTM(
    embeddings, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
one_d = flatten(decoder_outputs)

decoder_outputs = decoder_dense(one_d)

decoder_model = Model(
    [decoder_inputs] + decoder_states_inputs,
    [decoder_outputs] + decoder_states)



decoder

def decode_sequence(input_seq):

    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1))
    
    target_seq[0, 0] = target_token_index["<SOS>"]

    
    words_target = embedding_decoder(target_seq)
    print(words_target.shape)

 

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
            output_tokens, h, c = decoder_model.predict_on_batch([target_seq] + states_value)
        # Sample a token
            print(output_tokens)
        # except Exception as e:
        #     pass
        #     break
    #     sampled_token_index = np.argmax(output_tokens[0, -1, :])
    #     sampled_char = reverse_target_char_index[sampled_token_index]
    #     decoded_sentence += sampled_char

    #     # Exit condition: either hit max length
    #     # or find stop character.
    #     if (sampled_char == '\n' or
    #        len(decoded_sentence) > max_decoder_seq_length):
    #         stop_condition = True

    #     # Update the target sequence (of length 1).
    #     target_seq = np.zeros((1, 1, num_decoder_tokens))
    #     target_seq[0, 0, sampled_token_index] = 1.

    #     # Update states
    #     states_value = [h, c]

    # return decoded_sentence
    
decode_sequence(encoder_input_data[0])

данные можно найти здесь : https://www.cs.cornell.edu/~cristian/Cornell_Movie-Dialogs_Corpus.html

ошибка:

decoder_outputs = decoder_dense(one_d) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer.py", line 886, in __call__ self.name) File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow/python/keras/engine/input_spec.py", line 216, in assert_input_compatibility ' but received input with shape ' + str(shape)) ValueError: Input 0 of layer dense is incompatible with the layer: expected axis -1 of input shape to have value 2304 but received input with shape [None, 256]

...