По сути, я пытаюсь реализовать модель Seq2Seq, используя слой RNN и ячейки GRU.Кажется, что все работает нормально во время тренировки (потеря и уменьшение val_loss)
Это моя реализация:
Кодер
class Encoder(object):
def __init__(self, num_words, embedding_size, state_size, layers=2, dropout_rate=0.1):
self.encoder_embedding = Embedding(input_dim=num_words,
output_dim=embedding_size,
name='encoder_embedding')
cells = [GRUCell(state_size, dropout=dropout_rate, recurrent_dropout=dropout_rate) for _ in range(layers)]
self.rnn_layer = RNN(cells, return_state=True)
def __call__(self, x):
# Start the neural network with its input-layer.
net = x
# Connect the embedding-layer.
net = self.encoder_embedding(net)
# Connect all the GRU-layers.
net = self.rnn_layer(net)
# This is the output of the encoder.
encoder_output, encoder_state = net[0], net[1:]
return encoder_output, encoder_state
Декодер
class Decoder(object):
def __init__(self, num_words, embedding_size, state_size, layers=2, dropout_rate=0.1):
# Different embedding due to different languages
self.embedding = Embedding(input_dim=num_words,
output_dim=embedding_size,
name='decoder_embedding')
cells = [GRUCell(state_size, dropout=dropout_rate, recurrent_dropout=dropout_rate) for _ in range(layers)]
self.rnn_layer = RNN(cells, return_sequences=True)
# self.batch_norm = BatchNormalization()
self.output = Dense(num_words,
activation='softmax',
name='decoder_output')
def __call__(self, x, initial_state):
# Start the decoder-network with its input-layer.
net = x
# Connect the embedding-layer.
net = self.embedding(net)
# Connect all the GRU-layers.
net = self.rnn_layer(net, initial_state=initial_state)
# Connect the final dense layer that converts to
# one-hot encoded arrays.
net = TimeDistributed(self.output)(net)
decoder_output = net
return decoder_output
Однако, когда я пытаюсь предсказать выборку:
class Translator(object):
def __init__(self, tokenizer_src, tokenizer_dest, encoder, decoder, start_word="", end_word=""):
self.tokenizer_src = tokenizer_src
self.tokenizer_dest = tokenizer_dest
self.encoder = encoder
self.decoder = decoder
self.start_word = start_word
self.end_word = end_word
def evaluate(self, input_text, delimiter=" "):
token_start = self.tokenizer_dest.word_index[self.start_word.strip()]
token_end = self.tokenizer_dest.word_index[self.end_word.strip()]
# Max number of tokens / words in the output sequence.
max_tokens = self.tokenizer_dest.max_tokens
# Convert the input-text to integer-tokens.
# Note the sequence of tokens has to be reversed.
# Padding is probably not necessary.
input_tokens = self.tokenizer_src.text_to_tokens(text=input_text,
reverse=True,
padding=True)
# Return [state] so we need to remove the outer dimension
initial_state = self.encoder.predict(input_tokens)
token_int = token_start
# Output text
decoded_tokens = []
# Wrap the input-data in a dict for clarity and safety,
# so we are sure we input the data in the right order.
while token_int != token_end and len(decoded_tokens) < max_tokens:
decoder_input_data = np.zeros((1, 1))
decoder_input_data[0, 0] = token_int
state_value = initial_state
x_data = {
'input_1': state_value,
'decoder_input': decoder_input_data
}
# print("At {} - state is {}".format(len(decoded_tokens), state_value))
output_tokens_and_state = self.decoder.predict_on_batch(x_data)
output_tokens, state = output_tokens_and_state[0], output_tokens_and_state[1:]
# Get the last predicted token as a one-hot encoded array.
token_int = np.argmax(output_tokens[0, -1, :])
sampled_word = self.tokenizer_dest.token_to_word(token_int)
decoded_tokens.append(sampled_word)
state_value = state
return delimiter.join(decoded_tokens[:-1])
Керас выскакивает ошибка, сообщая мне, что мне нужно значение подачи для моего encoder_input:
Traceback (последний вызов был последним): файл "inference.py", строка 31, в результате = translationator.evaluate (входной) файл "C: \ A_PERSONAL \ Github \ Machine Translation \ Classes \Translator.py ", строка 136, в файле output output_tokens_and_state = self.decoder.predict_on_batch ([decoder_input_data, state_value]) в файле" C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ engine \ engine ".py ", строка 1274, в предикате output_on_batch = self.predict_function (ins) Файл" C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ backend \ tenorflow_backend.py ", строка 2715,in call return self._call (входные данные) Файл "C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ backend \ tenorflow_backend.py ", строка 2675, в _call fetched = self._callable_fn (* array_vals) Файл" C: \ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \tenorflow \ python \ client \ session.py ", строка 1454, в вызов self._session._session, self._handle, args, status, None) Файл" C: \ Users \ quang \ Anaconda3 \ envs "\ car \ lib \ site-packages \ tenorflow \ python \ framework \ errors_impl.py ", строка 519, в exit c_api.TF_GetCode (self.status.status))
tenorflow.python.framework.errors_impl.InvalidArgumentError: Необходимо передать значение для тензора-заполнителя 'encoder_input' с плавающей запятой dtype и shape [?,?] [[Node: encoder_input = Placeholderdtype = DT_FLOAT, shape = [?,?], _device = "/ job: localhost / реплика: 0 / task: 0 / device: GPU: 0 "]] [[Node: time_distributed_2 / Reshape_1 / _41 = _Recvclient_terminated = false, recv_device =" / job: localhost / replica: 0 / task: 0/ device: CPU: 0 ", send_device =" / job: localhost / replica: 0 / task: 0 / device: GPU: 0 ", send_device_incarnation = 1, tenor_name =" edge_591_time_distributed_2 / Reshape_1 ", tennors_type = DT_FLOAT, _device =" / job: localhost / replica: 0 / task: 0 / device: CPU: 0 "]]