Проблемы с моделью Seq2Seq с использованием слоя RNN и ячеек GRU в Керасе - PullRequest
0 голосов
/ 10 октября 2018

По сути, я пытаюсь реализовать модель Seq2Seq, используя слой RNN и ячейки GRU.Кажется, что все работает нормально во время тренировки (потеря и уменьшение val_loss)

Это моя реализация:

Кодер

class Encoder(object):
def __init__(self, num_words, embedding_size, state_size, layers=2, dropout_rate=0.1):

    self.encoder_embedding = Embedding(input_dim=num_words,
                                       output_dim=embedding_size,
                                       name='encoder_embedding')

    cells = [GRUCell(state_size, dropout=dropout_rate, recurrent_dropout=dropout_rate) for _ in range(layers)]
    self.rnn_layer = RNN(cells, return_state=True)

def __call__(self, x):
    # Start the neural network with its input-layer.
    net = x
    # Connect the embedding-layer.
    net = self.encoder_embedding(net)

    # Connect all the GRU-layers.
    net = self.rnn_layer(net)

    # This is the output of the encoder.
    encoder_output, encoder_state = net[0], net[1:]

    return encoder_output, encoder_state

Декодер

class Decoder(object):
def __init__(self, num_words, embedding_size, state_size, layers=2, dropout_rate=0.1):

    # Different embedding due to different languages
    self.embedding = Embedding(input_dim=num_words,
                               output_dim=embedding_size,
                               name='decoder_embedding')

    cells = [GRUCell(state_size, dropout=dropout_rate, recurrent_dropout=dropout_rate) for _ in range(layers)]
    self.rnn_layer = RNN(cells, return_sequences=True)
    # self.batch_norm = BatchNormalization()
    self.output = Dense(num_words,
                        activation='softmax',
                        name='decoder_output')

def __call__(self, x, initial_state):
    # Start the decoder-network with its input-layer.
    net = x
    # Connect the embedding-layer.
    net = self.embedding(net)
    # Connect all the GRU-layers.
    net = self.rnn_layer(net, initial_state=initial_state)

    # Connect the final dense layer that converts to
    # one-hot encoded arrays.
    net = TimeDistributed(self.output)(net)

    decoder_output = net
    return decoder_output

Однако, когда я пытаюсь предсказать выборку:

class Translator(object):
def __init__(self, tokenizer_src, tokenizer_dest, encoder, decoder, start_word="", end_word=""):

    self.tokenizer_src = tokenizer_src
    self.tokenizer_dest = tokenizer_dest
    self.encoder = encoder
    self.decoder = decoder
    self.start_word = start_word
    self.end_word = end_word

def evaluate(self, input_text, delimiter=" "):

    token_start = self.tokenizer_dest.word_index[self.start_word.strip()]
    token_end = self.tokenizer_dest.word_index[self.end_word.strip()]

    # Max number of tokens / words in the output sequence.
    max_tokens = self.tokenizer_dest.max_tokens

    # Convert the input-text to integer-tokens.
    # Note the sequence of tokens has to be reversed.
    # Padding is probably not necessary.
    input_tokens = self.tokenizer_src.text_to_tokens(text=input_text,
                                                     reverse=True,
                                                     padding=True)

    # Return [state] so we need to remove the outer dimension
    initial_state = self.encoder.predict(input_tokens)

    token_int = token_start

    # Output text
    decoded_tokens = []

    # Wrap the input-data in a dict for clarity and safety,
    # so we are sure we input the data in the right order.

    while token_int != token_end and len(decoded_tokens) < max_tokens:

        decoder_input_data = np.zeros((1, 1))
        decoder_input_data[0, 0] = token_int
        state_value = initial_state

        x_data = {
            'input_1': state_value,
            'decoder_input': decoder_input_data
        }
        # print("At {} - state is {}".format(len(decoded_tokens), state_value))

        output_tokens_and_state = self.decoder.predict_on_batch(x_data)
        output_tokens, state = output_tokens_and_state[0], output_tokens_and_state[1:]

        # Get the last predicted token as a one-hot encoded array.
        token_int = np.argmax(output_tokens[0, -1, :])
        sampled_word = self.tokenizer_dest.token_to_word(token_int)
        decoded_tokens.append(sampled_word)

        state_value = state

    return delimiter.join(decoded_tokens[:-1])

Керас выскакивает ошибка, сообщая мне, что мне нужно значение подачи для моего encoder_input:

Traceback (последний вызов был последним): файл "inference.py", строка 31, в результате = translationator.evaluate (входной) файл "C: \ A_PERSONAL \ Github \ Machine Translation \ Classes \Translator.py ", строка 136, в файле output output_tokens_and_state = self.decoder.predict_on_batch ([decoder_input_data, state_value]) в файле" C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ engine \ engine ".py ", строка 1274, в предикате output_on_batch = self.predict_function (ins) Файл" C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ backend \ tenorflow_backend.py ", строка 2715,in call return self._call (входные данные) Файл "C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ backend \ tenorflow_backend.py ", строка 2675, в _call fetched = self._callable_fn (* array_vals) Файл" C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \tenorflow \ python \ client \ session.py ", строка 1454, в вызов self._session._session, self._handle, args, status, None) Файл" C: \ Users \ quang \ Anaconda3 \ envs "\ car \ lib \ site-packages \ tenorflow \ python \ framework \ errors_impl.py ", строка 519, в exit c_api.TF_GetCode (self.status.status))

tenorflow.python.framework.errors_impl.InvalidArgumentError: Необходимо передать значение для тензора-заполнителя 'encoder_input' с плавающей запятой dtype и shape [?,?] [[Node: encoder_input = Placeholderdtype = DT_FLOAT, shape = [?,?], _device = "/ job: localhost / реплика: 0 / task: 0 / device: GPU: 0 "]] [[Node: time_distributed_2 / Reshape_1 / _41 = _Recvclient_terminated = false, recv_device =" / job: localhost / replica: 0 / task: 0/ device: CPU: 0 ", send_device =" / job: localhost / replica: 0 / task: 0 / device: GPU: 0 ", send_device_incarnation = 1, tenor_name =" edge_591_time_distributed_2 / Reshape_1 ", tennors_type = DT_FLOAT, _device =" / job: localhost / replica: 0 / task: 0 / device: CPU: 0 "]]

...