Я пытаюсь объединить решение из этого GitHub (прогнозирование временных рядов с экзогенным вводом) https://github.com/aaxwaz/Multivariate-Time-Series-forecast-using-seq2seq-in-TensorFlow с этим GitHub (модель для подсчета данных с использованием отрицательной биномиальной пользовательской функции потерь) https://github.com/gokceneraslan/neuralnet_countmodels. Я хотел бы иметь модель для прогнозирования проблемы временных рядов с экзогенными данными, но вывод - это данные подсчета (целое число), и я хотел бы использовать отрицательный бином. Потери во время тренировки возвращают Нан. кто-нибудь может помочь мне изменить код в соответствии с целями. это модифицированный код, который я пытаюсь запустить:
class NB(object):
def __init__(self, theta=None, theta_init=[0.0],
scale_factor=1.0, scope='nbinom_loss/',
debug=False, **theta_kwargs):
self.eps = 1e-10
self.scale_factor = scale_factor
self.debug = debug
self.scope = scope
with tf.name_scope(self.scope):
# a variable may be given by user or it can be created here
if theta is None:
theta = tf.Variable(theta_init, dtype=tf.float32,
name='theta', **theta_kwargs)
# keep a reference to the variable itself
self.theta_variable = theta
# to keep dispersion always non-negative
self.theta = tf.nn.softplus(theta)
def loss(self, y_true, y_pred, reduce=True):
scale_factor = self.scale_factor
eps = self.eps
with tf.name_scope(self.scope):
y_true = tf.cast(y_true, tf.float32)
y_pred = tf.cast(y_pred, tf.float32) * scale_factor
theta = 1.0/(self.theta+eps)
t1 = -tf.lgamma(y_true+theta+eps)
t2 = tf.lgamma(theta+eps)
t3 = tf.lgamma(y_true+1.0)
t4 = -(theta * (tf.log(theta+eps)))
t5 = -(y_true * (tf.log(y_pred+eps)))
t6 = (theta+y_true) * tf.log(theta+y_pred+eps)
if self.debug:
tf.summary.histogram('t1', t1)
tf.summary.histogram('t2', t2)
tf.summary.histogram('t3', t3)
tf.summary.histogram('t4', t4)
tf.summary.histogram('t5', t5)
tf.summary.histogram('t6', t6)
final = t1 + t2 + t3 + t4 + t5 + t6
if reduce:
final = tf.reduce_mean(final)
return final
def build_graph(feed_previous = False):
tf.reset_default_graph()
global_step = tf.Variable(
initial_value=0,
name="global_step",
trainable=False,
collections=[tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES])
weights = {
'out': tf.get_variable('Weights_out', \
shape = [hidden_dim, output_dim], \
dtype = tf.float32, \
initializer = tf.contrib.layers.xavier_initializer()),
'out_dec_inp': tf.get_variable('Weights_out_dec', \
shape = [output_dim+1, output_dim], \
dtype = tf.float32, \
initializer = tf.contrib.layers.xavier_initializer()),
}
biases = {
'out': tf.get_variable('Biases_out', \
shape = [output_dim], \
dtype = tf.float32, \
initializer = tf.zeros_initializer()),
'out_dec_inp': tf.get_variable('Biases_out_dec', \
shape = [output_dim], \
dtype = tf.float32, \
initializer = tf.zeros_initializer()),
}
with tf.variable_scope('Seq2seq'):
# Encoder: inputs
enc_inp = [
tf.placeholder(tf.float32, shape=(None, input_dim), name="inp_{}".format(t))
for t in range(input_seq_len)
]
# Decoder: target outputs
target_seq = [
tf.placeholder(tf.float32, shape=(None, output_dim), name="y".format(t))
for t in range(output_seq_len)
]
# Extreme events bool vectors for input seq
#input_seq_extremes_bool = [
# tf.placeholder(tf.float32, shape=(None, 1), name="event_bool".format(t))
# for t in range(input_seq_len)
#]
# Extreme events bool vectors for output seq
output_seq_extremes_bool = [
tf.placeholder(tf.float32, shape=(None, 1), name="event_bool".format(t))
for t in range(output_seq_len)
]
# Give a "GO" token to the decoder.
# If dec_inp are fed into decoder as inputs, this is 'guided' training; otherwise only the
# first element will be fed as decoder input which is then 'un-guided'
dec_inp = [ tf.zeros_like(target_seq[0], dtype=tf.float32, name="GO") ] + target_seq[:-1]
dec_inp = [ tf.concat([b, d], 1) for b, d in zip(output_seq_extremes_bool, dec_inp) ]
#enc_inp = [ tf.concat([b, e], 1) for b, e in zip(input_seq_extremes_bool, enc_inp_raw) ]
with tf.variable_scope('LSTMCell'):
cells = []
for i in range(num_stacked_layers):
with tf.variable_scope('RNN_{}'.format(i)):
cells.append(tf.contrib.rnn.LSTMCell(hidden_dim))
cell = tf.contrib.rnn.MultiRNNCell(cells)
def _rnn_decoder(decoder_inputs,
initial_state,
cell,
loop_function=None,
scope=None):
"""RNN decoder for the sequence-to-sequence model.
Args:
decoder_inputs: A list of 2D Tensors [batch_size x input_size].
initial_state: 2D Tensor with shape [batch_size x cell.state_size].
cell: rnn_cell.RNNCell defining the cell function and size.
loop_function: If not None, this function will be applied to the i-th output
in order to generate the i+1-st input, and decoder_inputs will be ignored,
except for the first element ("GO" symbol). This can be used for decoding,
but also for training to emulate http://arxiv.org/abs/1506.03099.
Signature -- loop_function(prev, i) = next
* prev is a 2D Tensor of shape [batch_size x output_size],
* i is an integer, the step number (when advanced control is needed),
* next is a 2D Tensor of shape [batch_size x input_size].
scope: VariableScope for the created subgraph; defaults to "rnn_decoder".
Returns:
A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 2D Tensors with
shape [batch_size x output_size] containing generated outputs.
state: The state of each cell at the final time-step.
It is a 2D Tensor of shape [batch_size x cell.state_size].
(Note that in some cases, like basic RNN cell or GRU cell, outputs and
states can be the same. They are different for LSTM cells though.)
"""
with variable_scope.variable_scope(scope or "rnn_decoder"):
state = initial_state
outputs = []
prev = None
for i, inp in enumerate(decoder_inputs):
if loop_function is not None and prev is not None:
with variable_scope.variable_scope("loop_function", reuse=True):
inp = loop_function(prev, i)
else:
inp = tf.matmul(inp, weights['out_dec_inp']) + biases['out_dec_inp']
if i > 0:
variable_scope.get_variable_scope().reuse_variables()
output, state = cell(inp, state)
outputs.append(output)
if loop_function is not None:
prev = output
return outputs, state
def _basic_rnn_seq2seq(encoder_inputs,
decoder_inputs,
cell,
feed_previous,
dtype=dtypes.float32,
scope=None):
"""Basic RNN sequence-to-sequence model.
This model first runs an RNN to encode encoder_inputs into a state vector,
then runs decoder, initialized with the last encoder state, on decoder_inputs.
Encoder and decoder use the same RNN cell type, but don't share parameters.
Args:
encoder_inputs: A list of 2D Tensors [batch_size x input_size].
decoder_inputs: A list of 2D Tensors [batch_size x input_size].
feed_previous: Boolean; if True, only the first of decoder_inputs will be
used (the "GO" symbol), all other inputs will be generated by the previous
decoder output using _loop_function below. If False, decoder_inputs are used
as given (the standard decoder case).
dtype: The dtype of the initial state of the RNN cell (default: tf.float32).
scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq".
Returns:
A tuple of the form (outputs, state), where:
outputs: A list of the same length as decoder_inputs of 2D Tensors with
shape [batch_size x output_size] containing the generated outputs.
state: The state of each decoder cell in the final time-step.
It is a 2D Tensor of shape [batch_size x cell.state_size].
"""
with variable_scope.variable_scope(scope or "basic_rnn_seq2seq"):
enc_cell = copy.deepcopy(cell)
_, enc_state = rnn.static_rnn(enc_cell, encoder_inputs, dtype=dtype)
if feed_previous:
return _rnn_decoder(decoder_inputs, enc_state, cell, _loop_function)
else:
return _rnn_decoder(decoder_inputs, enc_state, cell)
def _loop_function(prev, i):
'''Naive implementation of loop function for _rnn_decoder. Transform prev from
dimension [batch_size x hidden_dim] to [batch_size x output_dim], which will be
used as decoder input of next time step '''
#return tf.matmul(prev, weights['out']) + biases['out']
temp_out = tf.matmul(prev, weights['out']) + biases['out']
temp_concat = tf.concat([output_seq_extremes_bool[i], temp_out], 1)
return tf.matmul(temp_concat, weights['out_dec_inp']) + biases['out_dec_inp']
dec_outputs, dec_memory = _basic_rnn_seq2seq(
enc_inp,
dec_inp,
cell,
feed_previous = feed_previous
)
reshaped_outputs = [tf.matmul(i, weights['out']) + biases['out'] for i in dec_outputs]
# Training loss and optimizer
with tf.variable_scope('Loss'):
nb = NB()
nbinom_loss, param_theta = nb.loss, nb.theta
output_loss = 0
for _y, _Y in zip(reshaped_outputs, target_seq):
output_loss += nbinom_loss(_Y, _y)
# L2 regularization for weights and biases
reg_loss = 0
for tf_var in tf.trainable_variables():
if 'Biases_' in tf_var.name or 'Weights_' in tf_var.name:
reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var))
loss = output_loss + lambda_l2_reg * reg_loss
with tf.variable_scope('Optimizer'):
optimizer = tf.contrib.layers.optimize_loss(
loss=loss,
learning_rate=learning_rate,
global_step=global_step,
optimizer='Adam',
clip_gradients=GRADIENT_CLIPPING)
saver = tf.train.Saver
return dict(
enc_inp = enc_inp,
target_seq = target_seq,
train_op = optimizer,
loss=loss,
saver = saver,
output_loss = reg_loss,
reshaped_outputs = reshaped_outputs,
output_seq_extremes_bool = output_seq_extremes_bool,
)
total_iteractions = 5000
batch_size = 10
KEEP_RATE = 0.5
train_losses = []
val_losses = []
rnn_model = build_graph(feed_previous=False)
saver = tf.train.Saver()
loss=[]
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print("Training losses: ")
for i in range(total_iteractions):
batch_input, batch_output, batch_in_event_bool, batch_out_event_bool = generate_train_supervised_samples(i=i, batch_size=batch_size)
feed_dict = {rnn_model['enc_inp'][t]: batch_input[:,t] for t in range(input_seq_len)}
feed_dict.update({rnn_model['target_seq'][t]: batch_output[:,t] for t in range(output_seq_len)})
feed_dict.update({rnn_model['output_seq_extremes_bool'][t]: batch_out_event_bool[:,t].reshape(-1,1) for t in range(output_seq_len)})
_, loss_t = sess.run([rnn_model['train_op'], rnn_model['loss']], feed_dict)
print(i,"-",loss_t)
loss.append(loss_t)
temp_saver = rnn_model['saver']()
save_path = temp_saver.save(sess, os.path.join('./', 'multivariate_ts_trial_case'))
print("Checkpoint saved at: ", save_path)