Мой класс нормализации периодических операций выглядит следующим образом
class BNLSTMCell(RNNCell):
def __init__(
self,
num_units,
is_training=True,
use_peepholes=False,
cell_clip=None,
initializer=None,
num_proj=None,
proj_clip=None,
forget_bias=1.0,
state_is_tuple=True,
activation=tf.tanh,
reuse=None,
):
super(BNLSTMCell, self).__init__(_reuse=reuse)
if not state_is_tuple:
tf.logging.log_first_n(tf.logging.WARN,
'%s: Using a concatenated state is slower and will soon be deprecated. Use state_is_tuple=True.'
, 1, self)
self.num_units = num_units
self.is_training = is_training
self.use_peepholes = use_peepholes
self.cell_clip = cell_clip
self.num_proj = num_proj
self.proj_clip = proj_clip
self.initializer = initializer
self.forget_bias = forget_bias
self.state_is_tuple = state_is_tuple
self.activation = activation
if num_proj:
self._state_size = (LSTMStateTuple(num_units,
num_proj) if state_is_tuple else num_units
+ num_proj)
self._output_size = num_proj
else:
self._state_size = (LSTMStateTuple(num_units,
num_units) if state_is_tuple else 2
* num_units)
self._output_size = num_units
@property
def state_size(self):
return self._state_size
@property
def output_size(self):
return self._output_size
def call(self, inputs, state):
num_proj = (self.num_units if self.num_proj
is None else self.num_proj)
if self.state_is_tuple:
(c_prev, h_prev) = state
else:
c_prev = tf.slice(state, [0, 0], [-1, self.num_units])
h_prev = tf.slice(state, [0, self.num_units], [-1,
num_proj])
dtype = inputs.dtype
input_size = inputs.get_shape().with_rank(2)[1]
if input_size.value is None:
raise ValueError('Could not infer input size from inputs.get_shape()[-1]'
)
scope = tf.get_variable_scope()
with tf.variable_scope(scope or type(self).__name__):
W_xh = tf.get_variable('input_kernel', [input_size, 4
* self.num_units],
initializer=orthogonal_initializer())
W_hh = tf.get_variable('state_kernel', [num_proj, 4
* self.num_units],
initializer=bn_lstm_identity_initializer(0.95))
xh = tf.matmul(inputs, W_xh)
hh = tf.matmul(h_prev, W_hh)
bn_xh = batch_norm(xh, 'input', self.is_training)
bn_hh = batch_norm(hh, 'state', self.is_training)
bias = tf.get_variable('bias', [4 * self.num_units])
# i:input gate, j:new input, f:forget gate, o:output gate
lstm_matrix = tf.nn.bias_add(tf.add(bn_xh, bn_hh), bias)
(i, j, f, o) = tf.split(value=lstm_matrix,
num_or_size_splits=4, axis=1)
# Diagonal connections
if self.use_peepholes:
w_f_diag = tf.get_variable('W_F_diag',
shape=[self.num_units], dtype=dtype)
w_i_diag = tf.get_variable('W_I_diag',
shape=[self.num_units], dtype=dtype)
w_o_diag = tf.get_variable('W_O_diag',
shape=[self.num_units], dtype=dtype)
if self.use_peepholes:
c = c_prev * tf.sigmoid(f + self.forget_bias + w_f_diag
* c_prev) + tf.sigmoid(i + w_i_diag * c_prev) \
* self.activation(j)
else:
c = c_prev * tf.sigmoid(f + self.forget_bias) \
+ tf.sigmoid(i) * self.activation(j)
if self.cell_clip is not None:
c = tf.clip_by_value(c, -self.cell_clip, self.cell_clip)
bn_c = batch_norm(c, 'cell', self.is_training)
if self.use_peepholes:
h = tf.sigmoid(o + w_o_diag * c) * self.activation(bn_c)
else:
h = tf.sigmoid(o) * self.activation(bn_c)
if self.num_proj is not None:
w_proj = tf.get_variable('projection/kernel',
[self.num_units, num_proj], dtype=dtype)
h = tf.matmul(h, w_proj)
if self.proj_clip is not None:
h = tf.clip_by_value(h, -self.proj_clip,
self.proj_clip)
new_state = (LSTMStateTuple(c,
h) if self.state_is_tuple else tf.concat(values=[c,
h], axis=1))
return (h, new_state)
, а моя функция batch_norm выглядит следующим образом
def batch_norm(x, name_scope, is_training):
with tf.variable_scope(name_scope):
return tf.layers.batch_normalization(inputs=x,training=is_training,fused=True)
И во время обучения у меня есть функция, в которой я получаю update_ops
def _update(self, loss, learning_rate, cluster):
'''
create the op to update the model
args:
loss: the loss to minimize
learning_rate: the learning rate
cluster: the tf cluster
returns: the update op
'''
#create the optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
#create an optimizer that aggregates gradients
if int(self.conf['numbatches_to_aggregate']) > 0:
if 'local' in cluster.as_dict():
num_workers = 1
else:
num_workers = len(cluster.as_dict()['worker'])
optimizer = tf.train.SyncReplicasOptimizer(
opt=optimizer,
replicas_to_aggregate=int(
self.conf['numbatches_to_aggregate']),
total_num_replicas=num_workers)
tf.summary.scalar('training_loss', loss,
collections=['training_summaries'])
#get the list of trainable variables
trainable = tf.trainable_variables()
#get the list of variables to be removed from the trainable
#variables
untrainable = tf.get_collection('untrainable')
#remove the variables
trainable = [var for var in trainable
if var not in untrainable]
#compute the gradients
grads_and_vars = optimizer.compute_gradients(
loss=loss,
var_list=trainable)
with tf.variable_scope('clip'):
#clip the gradients
grads_and_vars = [(tf.clip_by_value(grad, -1., 1.), var)
for grad, var in grads_and_vars]
#opperation to apply the gradients
apply_gradients_op = optimizer.apply_gradients(
grads_and_vars=grads_and_vars,
name='apply_gradients')
#all remaining operations with the UPDATE_OPS GraphKeys
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
print("update_ops {}".format(update_ops))
print("################")
#create an operation to update the gradients, the batch_loss
#and do all other update ops
update_op = tf.group(
*([apply_gradients_op] + update_ops),
name='update')
return update_op
Полученное сообщение об ошибке выглядит следующим образом:
Файл "/home/ubuntu/workspace/reproduce/jobs/nabu/nabu/neuralnetworks/trainers/trainer.py"строка 769, в выходных данных поезда ['training_summaries']]) Файл "/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", строка 671, в run run_metadata = run_metadata) Файл "/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", строка 1156, в run run_metadata = run_metad)Файл "/home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py", строка 1255, в ходе выполнения поднимает файл six.reraise (* original_exc_info) "/главная / убунту / anaconda3 / envs / тензорflow_p27 / lib / python2.7 / site-packages / tenorflow / python / training / monitored_session.py ", строка 1240, в прогоне возвращает файл self._sess.run (* args, ** kwargs)" / home / ubuntu / anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/training/monitored_session.py ", строка 1312, в файле run_metadata = run_metadata)" / home / ubuntu / anaconda3 / envs / tenorsflow_p27 / lib /python2.7 / site-packages / tenorflow / python / training / monitored_session.py ", строка 1076, в прогоне return self._sess.run (* args, ** kwargs) Файл" / home / ubuntu / anaconda3 / envs / tenorsflow_p27/lib/python2.7/site-packages/tensorflow/python/client/session.py ", строка 929, в запуске run_metadata_ptr) Файл" /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages / tenorflow / python / client / session.py ", строка 1152, в _run feed_dict_tensor, options, run_metadata) Файл" /home/ubuntu/anaconda3/envs/tensorflow_p27/lib/python2.7/site-packages/tensorflow/python/client/session.py ", строка 1328, в _do_run run_metadata) Файл" / home / ubuntu / anaconda3 / envs / tenorflow_p27 / lib / python2.7 / site-packages / tenorflow / python / client / session.py ", строка 1348, в _do_call поднять тип (e) (node_def, op, message) тензорный поток.python.framework.errors_impl.InvalidArgumentError: узел train / update (определенный в /home/ubuntu/workspace/reproduce/jobs/nabu/nabu/neuralnetworks/trainers/trainer.py:578) имеет входные данные из разных фреймов.Поезд узла ввода / Listener / features / layer1 / BLSTM / bidirectional_rnn / fw / fw / while / fw / bnlstm_cell / bnlstm_cell / state / batch_normalization / AssignMovingAvg (определяется в / home / ubuntu / рабочей области / размножаться / jobs / nabu / nabuneuralnetworks / components / recurrent_batch.py: 61) находится в кадре 'train / Listener / features / layer1 / BLSTM / bidirectional_rnn / fw / fw / while / while_context'.Узел ввода train / apply_gradients / Assign (определенный в /home/ubuntu/workspace/reproduce/jobs/nabu/nabu/neuralnetworks/trainers/trainer.py:569) находится в кадре ''.