Я пытаюсь запустить нейронный машинный перевод с моделью внимания (см. Ссылку на учебник и код ниже) https://www.tensorflow.org/tutorials/text/nmt_with_attention
import os
import re
import numpy as np
import tensorflow as tf
from collections import Counter
import warnings
import gzip
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
def preprocess_sentence(sentence):
sentence = "<start> " + sentence + " <end>"
return sentence
def parse_data(file_name):
with open(file_name, 'r', encoding='utf8') as lines:
lines = lines.read().split("\n")
sentence_data = [line for line in lines]
return sentence_data
def tokenizer(language):
language_tokenizer = tf.keras.preprocessing.text.Tokenizer(
num_words=None, filters="", split=" "
)
# 统计词频生成词表
language_tokenizer.fit_on_texts(language)
# 将文本转成id
tensor = language_tokenizer.texts_to_sequences(language)
# padding操作,在句子后补0
tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor, padding="post")
return language_tokenizer, tensor
def max_length(tensor):
return max(len(t) for t in tensor)
def make_dataset(input_tensor, output_tensor, batch_size, epochs, shuffle):
dataset = tf.data.Dataset.from_tensor_slices((input_tensor, output_tensor))
if (shuffle):
dataset = dataset.shuffle(len(input_tensor))
dataset = dataset.repeat(epochs).batch(batch_size=batch_size, drop_remainder=True, )
return dataset
strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
class Encoder(tf.keras.layers.Layer):
def __init__(self, vocab_size, embedding_units, encoding_units, batch_size):
super(Encoder, self).__init__()
self.batch_size = batch_size
self.encoding_units = encoding_units
self.embedding = tf.keras.layers.Embedding(
vocab_size, embedding_units)
self.gru = tf.keras.layers.GRU(self.encoding_units, return_sequences=True, return_state=True,
recurrent_initializer='glorot_uniform')
def call(self, x, hidden):
x = self.embedding(x)
output, state = self.gru(x, initial_state=hidden)
return output, state
def initialize_hidden_state(self):
return tf.zeros((self.batch_size, self.encoding_units))
class Attention(tf.keras.layers.Layer):
def __init__(self, units):
super(Attention, self).__init__()
self.w1 = tf.keras.layers.Dense(units)
self.w2 = tf.keras.layers.Dense(units)
self.v = tf.keras.layers.Dense(1)
def call(self, decoder_hidden, encoder_outputs):
# decoder_hidden.shape:(batch_size, units)
# encoder_outputs.shape:(batch_size, length, units)
# decoder_hidden_with_time_axis.shape:(batch_size, 1, units)
decoder_hidden_with_time_axis = tf.keras.backend.expand_dims(decoder_hidden, 1)
# v_before:(batch_size, length, units)
# v_after:(batch_size, length, 1)
score = self.v(tf.keras.activations.tanh(self.w1(encoder_outputs) + self.w2(decoder_hidden_with_time_axis)))
attention_weights = tf.keras.activations.softmax(score, axis=1)
context_vector = attention_weights * encoder_outputs
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights # attention_weights是为了方便可视化
class Decoder(tf.keras.layers.Layer):
def __init__(self, vocab_size, embedding_units, decoding_units, batch_size):
super(Decoder, self).__init__()
self.batch_size = batch_size
self.decoding_units = decoding_units
self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_units)
self.gru = tf.keras.layers.GRU(self.decoding_units, return_sequences=True, return_state=True,
recurrent_initializer='glorot_uniform')
self.fc = tf.keras.layers.Dense(vocab_size)
self.attention = Attention(self.decoding_units)
def call(self, x, hidden, encoding_outputs):
context_vector, attention_weights = self.attention(hidden, encoding_outputs)
x = self.embedding(x)
combined_x = tf.concat([tf.keras.backend.expand_dims(context_vector, 1), x], axis=-1)
output, state = self.gru(combined_x)
# output.shape: (batch_size, decoding_units)
output = tf.reshape(output, (-1, output.shape[2]))
# output.shape: (batch_size, vocab_size)
output = self.fc(output)
return output, state, attention_weights
class Seq2Seq(tf.keras.Model):
def __init__(self, input_vocab_size, output_vocab_size, embedding_units, units, batch_size):
super(Seq2Seq, self).__init__()
self.encoder = Encoder(input_vocab_size, embedding_units, units, batch_size)
self.decoder = Decoder(output_vocab_size, embedding_units, units, batch_size)
def call(self, inp, targ, encoding_hidden):
encoding_outputs, encoding_hidden = self.encoder(inp, encoding_hidden)
decoding_hidden = encoding_hidden
predictions = []
for t in range(0, targ.shape[1] - 1):
decoding_input = tf.expand_dims(targ[:, t], 1)
prediction, decoding_hidden, _ = self.decoder(decoding_input,
decoding_hidden,
encoding_outputs)
predictions.append(prediction)
return predictions
with strategy.scope():
embedding_units = 256
units = 1024
max_input = -1
max_output = -1
input_tokenizer = tf.keras.preprocessing.text.Tokenizer(
num_words=None, filters="", split=" "
)
output_tokenizer = tf.keras.preprocessing.text.Tokenizer(
num_words=None, filters="", split=" "
)
for i in range(0, 33):
en_file = "training-giga-fren/data/en_" + str(i) + ".txt"
fr_file = "training-giga-fren/data/fr_" + str(i) + ".txt"
en_data = parse_data(en_file)
fr_data = parse_data(fr_file)
input_tokenizer.fit_on_texts(en_data)
output_tokenizer.fit_on_texts(fr_data)
input_tensor = input_tokenizer.texts_to_sequences(en_data)
input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor, padding="post")
output_tensor = output_tokenizer.texts_to_sequences(fr_data)
output_tensor = tf.keras.preprocessing.sequence.pad_sequences(output_tensor, padding="post")
tmp_max_input = max_length(input_tensor)
tmp_max_output = max_length(output_tensor)
if(tmp_max_input>max_input):
max_input = tmp_max_input
if(tmp_max_output>max_output):
max_output = tmp_max_output
print("Count {} max_input {} max_output {} input_vocab_size {} output_vocab_size {}"
.format(i, max_input, max_output,
len(input_tokenizer.word_index) + 1,
len(output_tokenizer.word_index) + 1))
batch_size_per_replica = 2
batch_size = batch_size_per_replica * len(gpus)
input_vocab_size = len(input_tokenizer.word_index) + 1
output_vocab_size = len(output_tokenizer.word_index) + 1
print("max_input {} max_output {}".format(max_input, max_output))
print("input_vocab_size {} output_vocab_size {}".format(input_vocab_size, output_vocab_size))
seq2seq = Seq2Seq(input_vocab_size, output_vocab_size, embedding_units, units, batch_size_per_replica)
with strategy.scope():
def loss_function(real, pred):
mask = tf.math.logical_not(tf.math.equal(real, 0))
loss = loss_object(real, pred)
mask = tf.cast(mask, dtype=loss.dtype)
loss *= mask
return tf.nn.compute_average_loss(loss, global_batch_size=batch_size)
def train_step(inp, targ, encoding_hidden):
loss = 0
with tf.GradientTape() as tape:
predictions = seq2seq(inp, targ, encoding_hidden)
for t in range(0, targ.shape[1] - 1):
prediction = predictions[t]
loss += loss_function(targ[:, t + 1], prediction)
batch_loss = loss / int(targ.shape[0])
variables = seq2seq.encoder.trainable_variables + seq2seq.decoder.trainable_variables
gradients = tape.gradient(loss, variables)
optimizer.apply_gradients((zip(gradients, variables)))
return batch_loss
def test_step(inp, targ, encoding_hidden):
loss = 0
predictions = seq2seq(inp, targ, encoding_hidden)
for t in range(0, targ.shape[1] - 1):
prediction = predictions[t]
loss += loss_function(targ[:, t + 1], prediction)
test_loss_object.update_state(loss)
@tf.function
def distribute_train_step(inp, targ, encoding_hidden):
batch_loss = strategy.experimental_run_v2(train_step, args=(inp, targ, encoding_hidden,))
return strategy.reduce(tf.distribute.ReduceOp.SUM, batch_loss, axis=None)
@tf.function
def distribute_test_step(inp, targ, encoding_hidden):
return strategy.experimental_run_v2(test_step, args=(inp, targ, encoding_hidden,))
with strategy.scope():
optimizer = tf.keras.optimizers.Adam()
# from_logits: Boolean, whether `output` is the result of a softmax, or is a tensor of logits.
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction="none")
test_loss_object = tf.keras.metrics.Mean(name="test_loss")
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
seq2seq=seq2seq)
try:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
except:
print("no latest model")
with strategy.scope():
for i in range(0, 33):
en_file = "training-giga-fren/data/en_" + str(i) + ".txt"
fr_file = "training-giga-fren/data/fr_" + str(i) + ".txt"
en_data = parse_data(en_file)
fr_data = parse_data(fr_file)
input_tensor = input_tokenizer.texts_to_sequences(en_data)
input_tensor = tf.keras.preprocessing.sequence.pad_sequences(input_tensor, padding="post")
output_tensor = output_tokenizer.texts_to_sequences(fr_data)
output_tensor = tf.keras.preprocessing.sequence.pad_sequences(output_tensor, padding="post")
epochs = 1
input_train, input_test, output_train, output_test = train_test_split(input_tensor,
output_tensor,
test_size=0.2)
train_dataset = make_dataset(input_train, output_train, batch_size, epochs, True)
test_dataset = make_dataset(input_test, output_test, batch_size, epochs, False)
dist_train_dataset = strategy.experimental_distribute_dataset(train_dataset)
dist_test_dataset = strategy.experimental_distribute_dataset(test_dataset)
epochs = 50
for epoch in range(epochs):
encoder_hidden = seq2seq.encoder.initialize_hidden_state()
total_loss = 0
num_batch = 0
for (batch, (inp, targ)) in enumerate(dist_train_dataset):
batch_loss = distribute_train_step(inp, targ, encoder_hidden)
total_loss += batch_loss
num_batch += 1
if (batch % 100 == 0):
print("Epoch {} Batch {} Loss {:.4f}".format(epoch + 1, batch, batch_loss.numpy()))
for (batch, (inp, targ)) in enumerate(dist_test_dataset):
distribute_test_step(inp, targ, encoder_hidden)
print("Epoch {} Train Loss {:.4f} Test Loss {:.4f}".format(epoch + 1, total_loss.numpy() / num_batch,
test_loss_object.result()))
test_loss_object.reset_states()
if (epoch + 1) % 2 == 0:
checkpoint.save(file_prefix=checkpoint_prefix)
def evaluate(sentence):
attention_matrix = np.zeros((max_output, max_input))
sentence = preprocess_sentence(sentence)
inputs = [input_tokenizer.word_index[token] for token in sentence.split(" ")]
inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen=max_input,
padding="post")
inputs = tf.convert_to_tensor(inputs)
result = ""
encoding_hidden = tf.zeros((1, units))
encoding_outputs, encoding_hidden = seq2seq.encoder(inputs, encoding_hidden)
decoding_hidden = encoding_hidden
decoding_input = tf.expand_dims([output_tokenizer.word_index["<start>"]], 0)
for t in range(max_output):
# attention_weights.shape:(batch_size, input_length, 1)
predictions, decoding_hidden, attention_weights = seq2seq.decoder(
decoding_input, decoding_hidden, encoding_outputs
)
attention_weights = tf.reshape(attention_weights, (-1,))
attention_matrix[t] = attention_weights.numpy()
# predictions.shape:(batch_size, vocab_size)
predictions_id = tf.argmax(predictions[0]).numpy()
if (output_tokenizer.index_word[predictions_id] == "<end>"):
return result, sentence, attention_matrix
else:
result += output_tokenizer.index_word[predictions_id] + " "
decoding_input = tf.expand_dims([predictions_id], 0)
return result, sentence, attention_matrix
def translation(input):
results, input, attention_matrix = evaluate(input)
print("input:", input)
print("translation:", results)
with open(r"training-giga-fren/data/en_1.txt") as f_test:
sentence = f_test.readlines()
cnt = 0
for test_sentence in sentence:
translation(test_sentence)
cnt += 1
if (cnt == 100):
break
В этом коде мне нужно пройти 33 наборы английских sh -французских наборов данных (каждый набор данных содержит 50 000 предложений. Из-за большого количества предложений я пытался использовать графический процессор для ускорения вычислений, но обнаружил, что программа работает очень медленно, и даже процесс был убит по системе (я не обнаружил, что использование памяти и памяти GPU выходит за рамки. См. использование памяти GPU ниже)
В этих наборах данных:
max_input 3434
max_output 3540
input_vocab_size 171871
output_vocab_size 201755
Моя конфигурация системы:
Ubuntu 16.04, CUDA 10.1
6 Intel (R) Core (TM) i5- 9600K CPU @ 3,70 ГГц
GeForce RTX 2080 8 ГБ (два)
16 ГБ памяти
Мои вопросы:
1. Память графического процессора занята, но коэффициент использования графического процессора всегда равен 0. Как я могу улучшить использование графического процессора Скорость действия?
2. Как я могу ускорить мою программу?
3. Почему процесс убит?