Мой второй графический процессор не используется, и ниже представлена моя модель и связанный с ней обучающий код. (Bi-LSTM тензор потока и питона).
Может кто-нибудь дать мне, где вносить изменения (tf.devices & tf.variable_scope и так далее).
Я получаю много путаницы при чтении tenorflow cifar10 примеров
Модель - Использование архитектуры BI-LSTM для прогнозирования классификации по нескольким меткам и нескольким классам:
import tensorflow as tf
def _variable_on_cpu(name, shape, initializer):
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer)
return var
def _variable_with_weight_decay(name, shape, initializer, wd):
var = _variable_on_cpu(name, shape, initializer)
if wd is not None and wd != 0.:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
else:
weight_decay = tf.constant(0.0, dtype=tf.float32)
return var, weight_decay
def _auc_pr(true, prob, threshold):
pred = tf.where(prob > threshold, tf.ones_like(prob), tf.zeros_like(prob))
tp = tf.logical_and(tf.cast(pred, tf.bool), tf.cast(true, tf.bool))
fp = tf.logical_and(tf.cast(pred, tf.bool), tf.logical_not(tf.cast(true, tf.bool)))
fn = tf.logical_and(tf.logical_not(tf.cast(pred, tf.bool)), tf.cast(true, tf.bool))
pre = tf.truediv(tf.reduce_sum(tf.cast(tp, tf.int32)), tf.reduce_sum(tf.cast(tf.logical_or(tp, fp), tf.int32)))
rec = tf.truediv(tf.reduce_sum(tf.cast(tp, tf.int32)), tf.reduce_sum(tf.cast(tf.logical_or(tp, fn), tf.int32)))
return pre, rec
class Model(object):
def __init__(self, config, is_train=True):
self.num_uni = 100
self.is_train = is_train
self.emb_size = config['emb_size']
self.thresholdVal = config['thresholdVal']
self.batch_size = config['batch_size']
self.num_kernel = config['num_kernel']
self.min_window = config['min_window']
self.max_window = config['max_window']
self.vocab_size = config['vocab_size']
self.num_classes = config['num_classes']
self.sent_len = config['sent_len']
self.l2_reg = config['l2_reg']
self.multi_instance = config['attention']
self.multi_label = config['multi_label']
if is_train:
self.optimizer = config['optimizer']
self.dropout = config['dropout']
self.build_graph()
def build_graph(self):
""" Build the computation graph. """
self._inputs = tf.placeholder(dtype=tf.int64, shape=[None, self.sent_len], name='input_x')
self._labels = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes], name='input_y')
losses = []
# lookup layer
with tf.variable_scope('embedding') as scope:
self._W_emb = _variable_on_cpu(name='embedding', shape=[self.vocab_size, self.emb_size],
initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
# sent_batch is of shape: (batch_size, sent_len, emb_size, 1), in order to use conv2d
sent_batch = tf.nn.embedding_lookup(params=self._W_emb, ids=self._inputs)
#sent_batch = tf.expand_dims(sent_batch, -1)
with tf.variable_scope('forward-lstm') as scope:
frw_cell = tf.contrib.rnn.LSTMCell(num_units = self.num_uni)
dropout_frw = tf.contrib.rnn.DropoutWrapper(frw_cell)
with tf.variable_scope('backward-lstm') as scope:
bcw_cell = tf.contrib.rnn.LSTMCell(num_units = self.num_uni)
dropout_bcw = tf.contrib.rnn.DropoutWrapper(bcw_cell)
with tf.variable_scope('encoder') as scope:
rnn_output,last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = dropout_frw,
cell_bw = dropout_bcw,
inputs = sent_batch,
#sequence_length = self.sent_len,
dtype=tf.float32)
# Make use of output of last_state
pool_flat = tf.concat([last_state[0].c,last_state[1].c],axis=-1)
# drop out layer
if self.is_train and self.dropout > 0:
pool_dropout = tf.nn.dropout(pool_flat, 1 - self.dropout)
else:
pool_dropout = pool_flat
# fully-connected layer
with tf.variable_scope('output') as scope:
W, wd = _variable_with_weight_decay('W', shape=[2*self.num_uni, self.num_classes],
initializer=tf.truncated_normal_initializer(stddev=0.05),
wd=self.l2_reg)
losses.append(wd)
biases = _variable_on_cpu('bias', shape=[self.num_classes],
initializer=tf.constant_initializer(0.01))
self.logits = tf.nn.bias_add(tf.matmul(pool_dropout, W), biases, name='logits')
# loss
with tf.variable_scope('loss') as scope:
if self.multi_label:
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self._labels,
name='cross_entropy_per_example')
else:
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self._labels,
name='cross_entropy_per_example')
cross_entropy_loss = tf.reduce_mean(cross_entropy, name='cross_entropy_loss')
losses.append(cross_entropy_loss)
self._total_loss = tf.add_n(losses, name='total_loss')
# eval with precision-recall
with tf.variable_scope('evaluation') as scope:
precision = []
recall = []
pre, rec = _auc_pr(self._labels, tf.sigmoid(self.logits), self.thresholdVal)
precision.append(pre)
recall.append(rec)
self._pre_op = precision
self._rec_op = recall
# train on a batch
self._lr = tf.Variable(0.0, trainable=False)
if self.is_train:
if self.optimizer == 'adadelta':
opt = tf.train.AdadeltaOptimizer(self._lr)
elif self.optimizer == 'adagrad':
opt = tf.train.AdagradOptimizer(self._lr)
elif self.optimizer == 'adam':
opt = tf.train.AdamOptimizer(self._lr)
elif self.optimizer == 'sgd':
opt = tf.train.GradientDescentOptimizer(self._lr)
else:
raise ValueError("Optimizer not supported.")
grads = opt.compute_gradients(self._total_loss)
self._train_op = opt.apply_gradients(grads)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
else:
self._train_op = tf.no_op()
return
@property
def inputs(self):
return self._inputs
@property
def labels(self):
return self._labels
@property
def lr(self):
return self._lr
@property
def train_op(self):
return self._train_op
@property
def total_loss(self):
return self._total_loss
@property
def eval_op(self):
return self._eval_op
@property
def pre_op(self):
return self._pre_op
@property
def rec_op(self):
return self._rec_op
@property
def scores(self):
return tf.sigmoid(self.logits)
@property
def W_emb(self):
return self._W_emb
def assign_lr(self, session, lr_value):
session.run(tf.assign(self.lr, lr_value))
def assign_embedding(self, session, pretrained):
session.run(tf.assign(self.W_emb, pretrained))
Обучение:
import numpy as np
from datetime import datetime
import tensorflow as tf
import pickle
import lstmModel as lstm
import time
import os
def frameDataBasedOnKey(config,keys,inputIdWithKey, labelIdWithKey):
num_classes = config['num_classes']
sent_len = config['sent_len']
stg_len = len(keys)
stg_inputIds = [inputIdWithKey.get(key) for key in keys]
stg_labelIds = [labelIdWithKey.get(key) for key in keys]
stg_input = np.zeros([stg_len,sent_len],dtype=int)
for rowId,rowVal in enumerate(stg_inputIds):
for colId,colVal in enumerate(rowVal):
stg_input[rowId][colId] = colVal
stg_Label = np.zeros([stg_len,num_classes],dtype=int)
scalingTo100 = 0-config['minlblIndex']
for rowId,rowVal in enumerate(stg_labelIds):
for colVal in [rVal for rVal in rowVal if rVal >= config['minlblIndex'] and rVal < config['maxlblIndex']]:
stg_Label[rowId][colVal+scalingTo100] = 1
return (stg_input,stg_Label)
def batch_iter(inputIdWithKey,labelIdWithKey,keys,config, isTest = False):
data_size = len(keys)
num_batches_per_epoch = int(np.ceil(float(data_size)/config['batch_size']))
num_epochs= 1 if isTest else config['num_epochs']
for epoch in range(num_epochs):
for batch_num in range(num_batches_per_epoch):
start_index = batch_num * config['batch_size']
end_index = min((batch_num + 1) * config['batch_size'], data_size)
yield frameDataBasedOnKey(config,keys[start_index:end_index], inputIdWithKey, labelIdWithKey)
def offset(array, pre, post):
ret = np.array(array)
ret = np.insert(ret, 0, pre)
ret = np.append(ret, post)
return ret
def calc_auc_pr(precision, recall):
assert len(precision) == len(recall)
return np.trapz(offset(precision, 1, 0), x=offset(recall, 0, 1), dx=5)
def _summary_for_scalar(name, value):
return tf.Summary(value=[tf.Summary.Value(tag=name, simple_value=float(value))])
def train(inputIdWithKey,labelIdWithKey,train_key, test_key,config,embeddingWordMatrix):
num_batches_per_epoch = int(np.ceil(float(len(train_key))/config['batch_size']))
max_steps = num_batches_per_epoch * config['num_epochs']
with tf.Graph().as_default():
with tf.variable_scope('lstm', reuse=None):
m = lstm.Model(config, is_train=True)
with tf.variable_scope('lstm', reuse=True):
mtest = lstm.Model(config, is_train=False)
saver = tf.train.Saver(tf.global_variables())
save_path = os.path.join(config['ModelOutput'].format(config['minlblIndex'],config['maxlblIndex']), 'model.ckpt')
summary_op = tf.summary.merge_all()
# session
#sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
#with tf.Session().as_default() as sess:
with tf.Session(config=tf.ConfigProto(log_device_placement=True)).as_default() as sess:
proj_config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
'''embedding = proj_config.embeddings.add()
embedding.tensor_name = m.W_emb.name
embedding.metadata_path = os.path.join(FLAGS.data_dir, 'vocab.txt')'''
train_summary_writer = tf.summary.FileWriter(os.path.join(config['ModelOutput'].format(config['minlblIndex'],config['maxlblIndex']), "train"), graph=sess.graph)
dev_summary_writer = tf.summary.FileWriter(os.path.join(config['ModelOutput'].format(config['minlblIndex'],config['maxlblIndex']), "dev"), graph=sess.graph)
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(train_summary_writer, proj_config)
tf.contrib.tensorboard.plugins.projector.visualize_embeddings(dev_summary_writer, proj_config)
sess.run(tf.global_variables_initializer())
# assign pretrained embeddings
if config['use_pretrain']:
pretrained_embedding = np.array(embeddingWordMatrix.get('Embedding_matrix'))
m.assign_embedding(sess, pretrained_embedding)
# initialize parameters
current_lr = config['init_lr']
lowest_loss_value = float("inf")
decay_step_counter = 0
global_step = 0
# evaluate on dev set
def dev_step(mtest, sess):
dev_loss = []
dev_auc = []
dev_f1_score = []
# create batch
test_batches = batch_iter(inputIdWithKey,labelIdWithKey,test_key, config, isTest = True)
for x_batch,y_batch in test_batches:
#loss_value, eval_value = sess.run([mtest.total_loss, mtest.eval_op],
loss_value, pre, rec = sess.run([mtest.total_loss, mtest.pre_op, mtest.rec_op],
feed_dict={mtest.inputs: np.array(x_batch), mtest.labels: np.array(y_batch)})
dev_loss.append(loss_value)
dev_auc.append(calc_auc_pr(pre, rec))
fOne = 0.0
if pre[0] != 0.0 and rec[0] != 0.0:
fOne = (2.0 * pre[0] * rec[0]) / (pre[0] + rec[0])
dev_f1_score.append(fOne) # threshold = 0.5
return np.mean(dev_loss), np.mean(dev_auc), np.mean(dev_f1_score)
# train loop
train_loss = []
train_auc = []
train_f1_score = []
train_batches = batch_iter(inputIdWithKey,labelIdWithKey,train_key, config)
for x_batch,y_batch in train_batches:
batch_size = len(x_batch)
m.assign_lr(sess, current_lr)
global_step += 1
feed = {m.inputs: np.array(x_batch), m.labels: np.array(y_batch)}
start_time = time.time()
#_, loss_value, eval_value = sess.run([m.train_op, m.total_loss, m.eval_op], feed_dict=feed)
_, loss_value, pre, rec = sess.run([m.train_op, m.total_loss, m.pre_op, m.rec_op], feed_dict=feed)
proc_duration = time.time() - start_time
train_loss.append(loss_value)
#pre, rec = zip(*eval_value)
#print("Pre: ", pre," Rec: ", rec)
auc = calc_auc_pr(pre, rec)
#f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5]) # threshold = 0.5
f1 = 0.0
if pre[0] != 0.0 and rec[0] != 0:
f1 = (2.0 * pre[0] * rec[0]) / (pre[0] + rec[0])
train_auc.append(auc)
train_f1_score.append(f1)
assert not np.isnan(loss_value), "Model loss is NaN."
# print log
if global_step % config['log_step'] == 0:
examples_per_sec = batch_size / proc_duration
format_str = '%s: step %d/%d, f1 = %.4f, auc = %.4f, loss = %.4f, pre = %.4f, rec = %.4f ' + \
'(%.1f examples/sec; %.3f sec/batch), lr: %.6f'
print(format_str % (datetime.now(), global_step, max_steps, f1, auc, loss_value,pre[0], rec[0],
examples_per_sec, proc_duration, current_lr))
# write summary
if global_step % config['summary_step'] == 0:
summary_str = sess.run(summary_op)
train_summary_writer.add_summary(summary_str, global_step)
dev_summary_writer.add_summary(summary_str, global_step)
# summary loss, f1
train_summary_writer.add_summary(
_summary_for_scalar('loss', np.mean(train_loss)), global_step=global_step)
train_summary_writer.add_summary(
_summary_for_scalar('auc', np.mean(train_auc)), global_step=global_step)
train_summary_writer.add_summary(
_summary_for_scalar('f1', np.mean(train_f1_score)), global_step=global_step)
dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess)
dev_summary_writer.add_summary(
_summary_for_scalar('loss', dev_loss), global_step=global_step)
dev_summary_writer.add_summary(
_summary_for_scalar('auc', dev_auc), global_step=global_step)
dev_summary_writer.add_summary(
_summary_for_scalar('f1', dev_f1), global_step=global_step)
dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess)
print("\n===== write summary =====")
print("%s: step %d/%d: train_loss = %.6f, train_auc = %.4f, train_f1 = %.4f" \
% (datetime.now(), global_step, max_steps,
np.mean(train_loss), np.mean(train_auc), np.mean(train_f1_score)))
print("%s: step %d/%d: dev_loss = %.6f, dev_auc = %.4f, dev_f1 = %.4f\n" \
% (datetime.now(), global_step, max_steps, dev_loss, dev_auc, dev_f1))
# reset container
train_loss = []
train_auc = []
train_f1_score = []
# decay learning rate if necessary
if loss_value < lowest_loss_value:
lowest_loss_value = loss_value
decay_step_counter = 0
else:
decay_step_counter += 1
if decay_step_counter >= config['tolerance_step']:
current_lr *= config['lr_decay']
print('%s: step %d/%d, Learning rate decays to %.5f' % \
(datetime.now(), global_step, max_steps, current_lr))
decay_step_counter = 0
# stop learning if learning rate is too low
if current_lr < 1e-5:
break
if global_step % config['checkpoint_step'] == 0:
saver.save(sess, save_path, global_step=global_step)
saver.save(sess, save_path, global_step=global_step)
config = {'batch_size':128,'num_epochs' : 2,'emb_size':300,'num_kernel':100,'min_window': 3,
'max_window':5,'vocab_size':0,'num_classes':150,
'sent_len':600,'l2_reg': 1e-4,'attention': False,'multi_label': True, 'optimizer':'adam','dropout':0.5,
'use_pretrain': True, 'init_lr' : 1e-3,'summary_step': 150,'lr_decay' : 0.95,'checkpoint_step':8000,
'tolerance_step':500,'log_step':25, 'thresholdVal': 0.5, 'ModelOutput':'ModelOutput_{0}-{1}',
'minlblIndex':0,'maxlblIndex':150}
with tf.device('/cpu:0'):
with open('shortListedEmbeddingMatrix_Full.pk', 'rb') as fin:
embeddingWordMatrix = pickle.load(fin)
with open('InputEmbIdWithKey.pk', 'rb') as fin:
inputIdWithKey = pickle.load(fin)
with open('ICDLabelFull_Id_WithKey.pk', 'rb') as fin:
labelIdWithKey = pickle.load(fin)
inputKey = set(inputIdWithKey.keys())
labelKey = set(labelIdWithKey.keys())
inputData = list(inputKey.intersection(labelKey))
np.random.shuffle(inputData) # Shuffle the training data
train_size = int(len(inputData) * 0.99)
train_key = inputData[:train_size]
test_key = inputData[train_size:]
config['vocab_size'] = len(embeddingWordMatrix.get('word_vocab'))
print('Training in progress ...')
train(inputIdWithKey,labelIdWithKey,train_key, test_key, config, embeddingWordMatrix)