- Я пытаюсь заново внедрить сеть генерации почерка на основе статьи Алекса Грейвса на эту топи c. Я использую набор данных из базы данных рукописного ввода IAM , который содержит записи положения пера на доске, где для каждого временного шага записаны 2 значения (координаты x и y).
- Временные шаги объединяются в штрихи (серии точек, где ручка касается доски).
- Штрихи собираются в строки текста.
- Каждая строка текста считается как один отдельный пакет
- Кроме того, я изменил реализацию слоя сети плотности смеси (MDN) для пользовательских целей.
(Теперь мои предположения начинаются)
- Поскольку длина последовательности меняется, каждая последовательность должна быть предварительно дополнена.
- Я думаю, что дополнительные значения должны быть замаскированы с
tf.keras.layers.Masking()
- Хотя, когда я включаю это слой, код разбивается на этапе добавления пользовательских функций потери за исключением
ValueError: weights can not be broadcast to values. values.rank=0. weights.rank=2. values.shape=(). weights.shape=(8, 1939).
Полный код:
Реализация MDN
# -*-encoding: utf-8-*-
# Author: Danil Kovalenko
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow_probability import distributions as tfd
def elu_plus_one_plus_epsilon(x):
"""ELU activation with a very small addition to help prevent
NaN in loss."""
return keras.backend.elu(x) + 1 + keras.backend.epsilon()
def biased_softmax(bias=0):
def activation(x):
return keras.activations.softmax(x * (1. + bias))
return activation
def biased_exp(bias=0):
def activation(x):
return tf.exp(x - bias)
return activation
class MDN(layers.Layer):
"""A Mixture Density Network Layer for Keras.
This layer has a few tricks to avoid NaNs in the loss function when training:
- Activation for variances is ELU + 1 + 1e-8 (to avoid very small values)
- Mixture weights (pi) are trained in as logits, not in the softmax space.
A loss function needs to be constructed with the same output dimension and number of mixtures.
A sampling function is also provided to sample from distribution parametrized by the MDN outputs.
"""
def __init__(self, output_dimension, num_mixtures, bias=0, **kwargs):
self.output_dim = output_dimension
self.num_mix = num_mixtures
self.bias = bias
with tf.name_scope('MDN'):
# end of stroke probability
self.mdn_e = layers.Dense(1, name='mdn_e', activation='sigmoid')
# mixing values, logits
self.mdn_pi = layers.Dense(self.num_mix, name='mdn_pi',
activation=biased_softmax(bias))
# means
self.mdn_mu = layers.Dense(self.output_dim * self.num_mix,
name='mdn_mu1')
# std`s
self.mdn_std = layers.Dense(self.output_dim * self.num_mix,
name='mdn_std1',
activation=elu_plus_one_plus_epsilon)
# correlation
# self.mdn_rho = layers.Dense(self.num_mix, name='mdn_rho',
# activation='tanh')
self.layers = [self.mdn_e, self.mdn_pi, self.mdn_mu,
self.mdn_std,
# self.mdn_rho,
]
super(MDN, self).__init__(**kwargs)
def build(self, input_shape):
with tf.name_scope('layers'):
for layer in self.layers:
layer.build(input_shape)
super(MDN, self).build(input_shape)
def compute_mask(self, inputs, mask=None):
tf.print(mask)
return mask
def call(self, x, mask=None):
tf.print(mask)
with tf.name_scope('MDN'):
mdn_out = layers.concatenate([l(x) for l in self.layers],
name='mdn_outputs')
return mdn_out
def get_config(self):
config = {
"output_dimension": self.output_dim,
"num_mixtures": self.num_mix,
"bias": self.bias
}
base_config = super(MDN, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def get_mixture_loss_func(output_dim, num_mixes, eps=1e-8):
"""
Construct a loss functions for the MDN layer parametrised
by number of mixtures.
"""
def mdn_loss_func(y_true, y_pred):
# Split the inputs into parameters, 1 for end-of-stroke, `num_mixes`
# for other
# y_true = tf.reshape(tensor=y_true, shape=y_pred.shape)
y_pred = tf.reshape(y_pred,
[-1, (2 * num_mixes * output_dim + 1) + num_mixes],
name='reshape_ypreds')
y_true = tf.reshape(y_true,
[-1, output_dim + 1],
name='reshape_ytrue')
out_e, out_pi, out_mus, out_stds = tf.split(
y_pred,
num_or_size_splits=[1,
num_mixes,
num_mixes * output_dim,
num_mixes * output_dim],
name='mdn_coef_split',
axis=-1
)
cat = tfd.Categorical(logits=out_pi)
components_splits = [output_dim] * num_mixes
mus = tf.split(out_mus, num_or_size_splits=components_splits, axis=1)
stds = tf.split(out_stds, num_or_size_splits=components_splits, axis=1)
components = [tfd.MultivariateNormalDiag(loc=mu_i, scale_diag=std_i)
for mu_i, std_i in zip(mus, stds)]
mix = tfd.Mixture(cat=cat, components=components)
xs, ys, es = tf.unstack(y_true, axis=-1)
X = tf.stack((xs, ys), axis=-1)
stroke = tfd.Bernoulli(logits=out_e)
loss1 = tf.negative(mix.log_prob(X))
loss2 = tf.negative(stroke.log_prob(es))
loss = tf.add(loss1, loss2)
loss = tf.reduce_mean(loss)
return loss
# Actually return the loss function
with tf.name_scope('MDN'):
return mdn_loss_func
def get_mixture_mse_accuracy(output_dim, num_mixes):
"""
Construct an MSE accuracy function for the MDN layer
that takes one sample and compares to the true value.
"""
# Construct a loss function with the right number of mixtures and outputs
def mse_func(y_true, y_pred):
# Reshape inputs in case this is used in a TimeDistributed layer
y_pred = tf.reshape(y_pred,
[-1, (2 * num_mixes * output_dim + 1) + num_mixes],
name='reshape_ypreds')
y_true = tf.reshape(y_true,
[-1, output_dim + 1],
name='reshape_ytrue')
out_e, out_pi, out_mus, out_stds = tf.split(
y_pred,
num_or_size_splits=[1,
num_mixes,
num_mixes * output_dim,
num_mixes * output_dim],
name='mdn_coef_split',
axis=-1
)
cat = tfd.Categorical(logits=out_pi)
components_splits = [output_dim] * num_mixes
mus = tf.split(out_mus, num_or_size_splits=components_splits, axis=1)
stds = tf.split(out_stds, num_or_size_splits=components_splits, axis=1)
components = [tfd.MultivariateNormalDiag(loc=mu_i, scale_diag=std_i)
for mu_i, std_i in zip(mus, stds)]
mix = tfd.Mixture(cat=cat, components=components)
stroke = tfd.Bernoulli(logits=out_e)
pos_samp = mix.sample()
stroke_samp = tf.cast(stroke.sample(), tf.float32)
samp = tf.concat((pos_samp, stroke_samp), axis=-1)
mse = tf.reduce_mean(tf.square(samp - y_true), axis=-1)
# Todo: temperature adjustment for sampling functon.
return mse
# Actually return the loss_func
with tf.name_scope('MDNLayer'):
return mse_func
Сеть
#!/usr/bin/env python3
# -*-encoding: utf-8-*-
# Author: Danil Kovalenko
import h5py
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Masking, add, Input, concatenate
from tensorflow.keras.models import Sequential, Model
from mdn import MDN, get_mixture_loss_func
from custom_mdn import MDN as _MDN, get_mixture_loss_func as _get_mixture_loss_func, get_mixture_mse_accuracy
def get_lstm(amt, params):
return [LSTM(**params) for i in range(amt)]
def define_model2(N, batch_size, time_steps, vector_size, num_mixtures):
"+ skip connections"
lstm_params = {'units': N,
'activation': 'tanh',
'return_sequences': True,
'batch_input_shape': (batch_size, time_steps, vector_size)
}
enter = Input(batch_shape=(batch_size, time_steps, vector_size))
mask = Masking(mask_value=(0, 0, 0),
batch_input_shape=(batch_size,time_steps, vector_size)
)(enter)
raw_lstm1, raw_lstm2, raw_lstm3 = get_lstm(3, lstm_params)
input_proxy = Dense(N)(mask)
lstm1 = raw_lstm1(mask)
lvl1_out = add([input_proxy, lstm1])
lstm2 = raw_lstm2(lvl1_out)
lvl2_out = add([input_proxy, lstm2])
lstm3 = raw_lstm3(lvl2_out)
lvl3_out = add([input_proxy, lstm3])
out_proxy = Dense(vector_size)
lstm1_proxy = out_proxy(lstm1)
lstm2_proxy = out_proxy(lstm2)
lstm3_proxy = out_proxy(lstm3)
out_dense = Dense(units=vector_size, activation='linear')(lvl3_out)
out_proxy = add([out_dense, lstm1_proxy, lstm2_proxy, lstm3_proxy])
out = _MDN(vector_size - 1, num_mixtures)(out_proxy)
m = Model(inputs=enter, outputs=out)
m.compile(optimizer='rmsprop',
loss=_get_mixture_loss_func(vector_size - 1, num_mixtures),
# metrics=[get_mixture_mse_accuracy(vector_size - 1, num_mixtures), ]
)
return m
if __name__ == '__main__':
N = 10
with h5py.File('../dataset.h5', 'r') as f:
X = f['lines'][:]
X = X[:200]
batch_size = 8
_, time_steps, vector_size = X.shape
m = define_model2(N, batch_size, time_steps - 1, vector_size, 5)
# print(m.summary())
size = X.shape[0] - X.shape[0] % batch_size
X_train = X[:size, :-1, :]
Y_train = X[:size, 1:, :]
X_train = tf.convert_to_tensor(X_train.astype(np.float64))
Y_train = tf.convert_to_tensor(Y_train.astype(np.float64))
m.fit(X_train, Y_train,
batch_size=None, epochs=1)
m.save('hwg_model3.h5')
Исключение
Traceback (most recent call last):
File "/home/godspell/UserData/Scripts/studyprojects4/handwritings_gen/models/rnn_model.py", line 66, in <module>
m = define_model2(N, batch_size, time_steps - 1, vector_size, 5)
File "/home/godspell/UserData/Scripts/studyprojects4/handwritings_gen/models/rnn_model.py", line 53, in define_model2
loss=_get_mixture_loss_func(vector_size - 1, num_mixtures),
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 446, in compile
self._compile_weights_loss_and_weighted_metrics()
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py", line 457, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1592, in _compile_weights_loss_and_weighted_metrics
self.total_loss = self._prepare_total_loss(masks)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1656, in _prepare_total_loss
reduction=losses_utils.ReductionV2.NONE)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/utils/losses_utils.py", line 107, in compute_weighted_loss
losses, sample_weight)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/losses/util.py", line 148, in scale_losses_by_sample_weight
sample_weight = weights_broadcast_ops.broadcast_weights(sample_weight, losses)
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/weights_broadcast_ops.py", line 167, in broadcast_weights
with ops.control_dependencies((assert_broadcastable(weights, values),)):
File "/home/godspell/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/weights_broadcast_ops.py", line 103, in assert_broadcastable
weights_rank_static, values.shape, weights.shape))
ValueError: weights can not be broadcast to values. values.rank=0. weights.rank=2. values.shape=(). weights.shape=(8, 1939).