Справочная информация
Я создаю Google Colabs для каждого выступления, которое мне показалось интересным на Саммите Tensorflow 2020. Как примечание, я использую Tensorflow 2.1.
Я столкнулся с проблемой при попытке реализовать разговор 'Learning To Read With Tensorflow'
.
Все хорошо, пока мы не дойдем до определения класса EncoderDecoder
. Когда я реализую метод подгонки в моем пользовательском подклассе Model
, я получаю ошибку, которая будет подробно описана ниже.
Последняя существенная ошибка: AttributeError: 'NoneType' object has no attribute 'dtype'
.
Однако я полагаю, что это связано с проблемой в коде области действия GradientTape
и / или проблемами с определением Decoder Layers
(включая Attention Layers
)
Основной код
# Not normally defined here... but doing so for clarity
MAX_VOCAB_SIZE = 5000
WINDOW_LENGTH = 11
class EncoderDecoder(tf.keras.Model):
def __init__(self,
max_features=MAX_VOCAB_SIZE,
output_seq_len=WINDOW_LENGTH-1,
embedding_dims=200,
rnn_units=512):
super().__init__()
self.max_features = max_features
self.output_seq_len = output_seq_len
self.embedding_dims = embedding_dims
self.rnn_units = rnn_units
self.vectorize_layer = \
tf.keras.layers.experimental.preprocessing.TextVectorization(
max_tokens=self.max_features,
standardize='lower_and_strip_punctuation',
split='whitespace',
ngrams=None,
output_mode='int',
output_sequence_length=self.output_seq_len,
pad_to_max_tokens=True)
# --- <ENCODER STUFF> ---
# Embedding
self.encoder_embedding = \
tf.keras.layers.Embedding(input_dim=self.max_features+1,
output_dim=self.embedding_dims)
# ENCODER
self.lstm_layer = \
tf.keras.layers.LSTM(units=self.rnn_units,
return_state=True)
# --- </ENCODER STUFF> ---
# --- <DECODER STUFF> ---
# Embedding
self.decoder_embedding = \
tf.keras.layers.Embedding(input_dim=self.max_features+1,
output_dim=self.embedding_dims)
# ---------------- MAYBE NOT NECESSARY ----------------
# Sampler (for use during training)
# This was not shown during the talk but it is pretty obvious
sampler = tfa.seq2seq.sampler.TrainingSampler()
# This was not shown during the talk but is required...
# This is my best guess
decoder_cell = tf.keras.layers.LSTMCell(units=self.rnn_units)
# ---------------- MAYBE NOT NECESSARY ----------------
# Output Layer For Decoder
self.projection_layer = \
tf.keras.layers.Dense(self.max_features)
# DECODER
self.decoder = \
tfa.seq2seq.BasicDecoder(cell=decoder_cell,
sampler=sampler,
output_layer=self.projection_layer)
# --- </DECODER STUFF> ---
# --- <ATTN STUFF> ---
# Basic dense attention layer to connect Encoder & Decoder
self.attention = tf.keras.layers.Attention()
# --- </ATTN STUFF> ---
def train_step(self, data):
""" Overwrite built-in train_step method
Args:
data (tuple): The example (ten `words`), and the label (one `word`)
Returns:
Metric results for all passed metrics
"""
# Split data into example (x) and label (y)
x, y = data[0], data[1]
# Vectorize the example words (x)
x = self.vectorize_layer(x)
# Vectorize the labels
# This will by default pad the output to 10 ... but we only need the
# first entry (the true label not the useless padding)
y = self.vectorize_layer(y)[:, 0]
# Convert our label into a one-hot encoding based on the max number of
# features that we will be using for our model
y_one_hot = tf.one_hot(y, self.max_features)
# Everything within GradientTape is recorded
# for later automatic differentiation
with tf.GradientTape() as tape:
# --- <ENCODER STUFF> ---
# Transform the example utilizing the encoder embedding
inputs = self.encoder_embedding(x)
# Get the encoder outputs and state by
# utilizing the encoder (lstm_layer)
# - encoder_outputs : [max_time, batch_size, num_units]
# - encoder_state : [state_h, state_c]
# * state_h --- The Hidden State
# * state_c --- The Cell State
encoder_outputs, state_h, state_c = self.lstm_layer(inputs)
# --- </ENCODER STUFF> ---
# --- <ATTN STUFF> ---
# Pass the encoder outputs and hidden state allowing us
# to track the intermediate state coming out of the encoder layers
attn_output = self.attention([encoder_outputs, state_h])
attn_output = tf.expand_dims(attn_output, axis=1)
# --- </ATTN STUFF> ---
# --- <DECODER STUFF> ---
# ??? Create an empty embedding ???
targets = self.decoder_embedding(tf.zeros_like(y))
# Concat the output of the attention layer to the last axis
# of the empty targets embedding
concat_output = tf.concat([targets, attn_output], axis=-1)
# Predict the targets using the state from the encoder
outputs, _, _ = \
self.decoder(concat_output, initial_state=[state_h, state_c])
# --- </DECODER STUFF> ---
# Automatically differeniate utilizing the loss and trainable variables
gradients = tape.gradient(loss, trainable_variables)
# Collect the outputs so that they can be optimized
self.optimizer.apply_gradients(zip(gradients, trainable_variables))
# Update the metric state prior to return
self.compiled_metrics.update_state(y_one_hot, y_pred)
return {m.name: m.result() for m in self.metrics}
model = EncoderDecoder()
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
optimizer="adam",
metrics=["accuracy"])
model.vectorize_layer.adapt(lines.batch(256))
# ERROR OCCURS ON THIS LINE
model.fit(data.batch(256),
epochs=45,
callbacks=[tf.keras.callbacks.ModelCheckpoint(filepath='text_gen')])
Подробное сообщение об ошибке
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-40-779906f7f617> in <module>()
1 model.fit(data.batch(256),
2 epochs=45,
----> 3 callbacks=[tf.keras.callbacks.ModelCheckpoint(filepath='text_gen')])
8 frames
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
233 max_queue_size=max_queue_size,
234 workers=workers,
--> 235 use_multiprocessing=use_multiprocessing)
236
237 total_samples = _get_total_number_of_samples(training_data_adapter)
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
591 max_queue_size=max_queue_size,
592 workers=workers,
--> 593 use_multiprocessing=use_multiprocessing)
594 val_adapter = None
595 if validation_data:
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, mode, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
704 max_queue_size=max_queue_size,
705 workers=workers,
--> 706 use_multiprocessing=use_multiprocessing)
707
708 return adapter
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, standardize_function, **kwargs)
700
701 if standardize_function is not None:
--> 702 x = standardize_function(x)
703
704 # Note that the dataset instance is immutable, its fine to reusing the user
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in standardize_function(dataset)
658 model.sample_weight_mode = getattr(model, 'sample_weight_mode', None)
659
--> 660 standardize(dataset, extract_tensors_from_dataset=False)
661
662 # Then we map using only the tensor standardization portion.
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2358 is_compile_called = False
2359 if not self._is_compiled and self.optimizer:
-> 2360 self._compile_from_inputs(all_inputs, y_input, x, y)
2361 is_compile_called = True
2362
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training.py in _compile_from_inputs(self, all_inputs, target, orig_inputs, orig_target)
2578 if training_utils.has_tensors(target):
2579 target = training_utils.cast_if_floating_dtype_and_mismatch(
-> 2580 target, self.outputs)
2581 training_utils.validate_input_types(target, orig_target,
2582 allow_dict=False, field_name='target')
/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_utils.py in cast_if_floating_dtype_and_mismatch(targets, outputs)
1334 if tensor_util.is_tensor(targets):
1335 # There is one target, so output[0] should be the only output.
-> 1336 return cast_single_tensor(targets, dtype=outputs[0].dtype)
1337 new_targets = []
1338 for target, out in zip(targets, outputs):
AttributeError: 'NoneType' object has no attribute 'dtype'
Как получить data
& lines
Переменные, если требуется копировать
Получить данные
>>> wget http://www.thespermwhale.com/jaseweston/babi/CBTest.tgz
>>> tar zxvf CBTest.tgz
>>> rm -rf CBTest.tgz
Предварительная обработка данных
# Load data from a dataset comprising lines
# from one or more text files.
lines = tf.data.TextLineDataset("<path-to>/cbt_train.txt")
# Filter Out Title Lines First
# This simple fn not included in this stackoverflow code
lines = lines.filter(lambda x: not is_title(x))
# Then We Remove All Punctuation
# This simple fn not included in this stackoverflow code
lines = lines.map(lambda x: remove_punc(x))
# Then We Remove All Extra Spaces Created By The Previous FN
# This simple fn not included in this stackoverflow code
lines = lines.map(lambda x: remove_extra_spaces(x))
# Then We Turn All The Uppercase Letters into Lowercase Letters
# This simple fn not included in this stackoverflow code
lines = lines.map(lambda x: make_lower(x))
# Get words from lines
words = lines.map(tf.strings.split)
words = words.unbatch()
# Get wordsets
wordsets = words.batch(11)
# get_example_label is a simple fn to split wordsets into examples and labels
# First ten words are the example and last word is the label
data = wordsets.map(get_example_label)
# Shuffle
data = data.shuffle(1024)
Ссылки
Заранее спасибо !!