Второй графический процессор не используется - тензор потока GPU с Python Bi-LSTM - Мультиклассовая классификация по нескольким меткам - PullRequest
0 голосов
/ 13 ноября 2018

Мой второй графический процессор не используется, и ниже представлена ​​моя модель и связанный с ней обучающий код. (Bi-LSTM тензор потока и питона).

Может кто-нибудь дать мне, где вносить изменения (tf.devices & tf.variable_scope и так далее).

Я получаю много путаницы при чтении tenorflow cifar10 примеров

Модель - Использование архитектуры BI-LSTM для прогнозирования классификации по нескольким меткам и нескольким классам:

import tensorflow as tf   
def _variable_on_cpu(name, shape, initializer):
    with tf.device('/cpu:0'):
        var = tf.get_variable(name, shape, initializer=initializer)
    return var

def _variable_with_weight_decay(name, shape, initializer, wd):
    var = _variable_on_cpu(name, shape, initializer)
    if wd is not None and wd != 0.:
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
    else:
        weight_decay = tf.constant(0.0, dtype=tf.float32)
    return var, weight_decay


def _auc_pr(true, prob, threshold):
    pred = tf.where(prob > threshold, tf.ones_like(prob), tf.zeros_like(prob))
    tp = tf.logical_and(tf.cast(pred, tf.bool), tf.cast(true, tf.bool))
    fp = tf.logical_and(tf.cast(pred, tf.bool), tf.logical_not(tf.cast(true, tf.bool)))
    fn = tf.logical_and(tf.logical_not(tf.cast(pred, tf.bool)), tf.cast(true, tf.bool))
    pre = tf.truediv(tf.reduce_sum(tf.cast(tp, tf.int32)), tf.reduce_sum(tf.cast(tf.logical_or(tp, fp), tf.int32)))
    rec = tf.truediv(tf.reduce_sum(tf.cast(tp, tf.int32)), tf.reduce_sum(tf.cast(tf.logical_or(tp, fn), tf.int32)))
    return pre, rec


class Model(object):

    def __init__(self, config, is_train=True):
        self.num_uni = 100
        self.is_train = is_train
        self.emb_size = config['emb_size']
        self.thresholdVal = config['thresholdVal']
        self.batch_size = config['batch_size']
        self.num_kernel = config['num_kernel']
        self.min_window = config['min_window']
        self.max_window = config['max_window']
        self.vocab_size = config['vocab_size']
        self.num_classes = config['num_classes']
        self.sent_len = config['sent_len']
        self.l2_reg = config['l2_reg']
        self.multi_instance = config['attention']
        self.multi_label = config['multi_label']
        if is_train:
            self.optimizer = config['optimizer']
            self.dropout = config['dropout']
        self.build_graph()

    def build_graph(self):
        """ Build the computation graph. """
        self._inputs = tf.placeholder(dtype=tf.int64, shape=[None, self.sent_len], name='input_x')
        self._labels = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes], name='input_y')
        losses = []

        # lookup layer
        with tf.variable_scope('embedding') as scope:
            self._W_emb = _variable_on_cpu(name='embedding', shape=[self.vocab_size, self.emb_size],
                                           initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
            # sent_batch is of shape: (batch_size, sent_len, emb_size, 1), in order to use conv2d
            sent_batch = tf.nn.embedding_lookup(params=self._W_emb, ids=self._inputs)
            #sent_batch = tf.expand_dims(sent_batch, -1)

        with tf.variable_scope('forward-lstm') as scope:
            frw_cell = tf.contrib.rnn.LSTMCell(num_units = self.num_uni)
            dropout_frw = tf.contrib.rnn.DropoutWrapper(frw_cell)

        with tf.variable_scope('backward-lstm') as scope:
            bcw_cell = tf.contrib.rnn.LSTMCell(num_units = self.num_uni)
            dropout_bcw = tf.contrib.rnn.DropoutWrapper(bcw_cell)

        with tf.variable_scope('encoder') as scope:
            rnn_output,last_state = tf.nn.bidirectional_dynamic_rnn(cell_fw = dropout_frw,
                                                                    cell_bw = dropout_bcw,
                                                                    inputs = sent_batch,
                                                                    #sequence_length = self.sent_len,
                                                                    dtype=tf.float32)

        # Make use of output of last_state
        pool_flat = tf.concat([last_state[0].c,last_state[1].c],axis=-1)

        # drop out layer
        if self.is_train and self.dropout > 0:
            pool_dropout = tf.nn.dropout(pool_flat, 1 - self.dropout)
        else:
            pool_dropout = pool_flat

        # fully-connected layer
        with tf.variable_scope('output') as scope:
            W, wd = _variable_with_weight_decay('W', shape=[2*self.num_uni, self.num_classes],
                                                initializer=tf.truncated_normal_initializer(stddev=0.05),
                                                wd=self.l2_reg)
            losses.append(wd)
            biases = _variable_on_cpu('bias', shape=[self.num_classes],
                                      initializer=tf.constant_initializer(0.01))
            self.logits = tf.nn.bias_add(tf.matmul(pool_dropout, W), biases, name='logits')

        # loss
        with tf.variable_scope('loss') as scope:
            if self.multi_label:
                cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self._labels,
                                                                        name='cross_entropy_per_example')
            else:
                cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self._labels,
                                                                        name='cross_entropy_per_example')

            cross_entropy_loss = tf.reduce_mean(cross_entropy, name='cross_entropy_loss')

            losses.append(cross_entropy_loss)
            self._total_loss = tf.add_n(losses, name='total_loss')

        # eval with precision-recall
        with tf.variable_scope('evaluation') as scope:
            precision = []
            recall = []
            pre, rec = _auc_pr(self._labels, tf.sigmoid(self.logits), self.thresholdVal)
            precision.append(pre)
            recall.append(rec)
            self._pre_op = precision
            self._rec_op = recall

        # train on a batch
        self._lr = tf.Variable(0.0, trainable=False)
        if self.is_train:
            if self.optimizer == 'adadelta':
                opt = tf.train.AdadeltaOptimizer(self._lr)
            elif self.optimizer == 'adagrad':
                opt = tf.train.AdagradOptimizer(self._lr)
            elif self.optimizer == 'adam':
                opt = tf.train.AdamOptimizer(self._lr)
            elif self.optimizer == 'sgd':
                opt = tf.train.GradientDescentOptimizer(self._lr)
            else:
                raise ValueError("Optimizer not supported.")
            grads = opt.compute_gradients(self._total_loss)
            self._train_op = opt.apply_gradients(grads)

            for var in tf.trainable_variables():
                tf.summary.histogram(var.op.name, var)
        else:
            self._train_op = tf.no_op()

        return

    @property
    def inputs(self):
        return self._inputs

    @property
    def labels(self):
        return self._labels

    @property
    def lr(self):
        return self._lr

    @property
    def train_op(self):
        return self._train_op

    @property
    def total_loss(self):
        return self._total_loss

    @property
    def eval_op(self):
        return self._eval_op

    @property
    def pre_op(self):
        return self._pre_op

    @property
    def rec_op(self):
        return self._rec_op

    @property
    def scores(self):
        return tf.sigmoid(self.logits)

    @property
    def W_emb(self):
        return self._W_emb

    def assign_lr(self, session, lr_value):
        session.run(tf.assign(self.lr, lr_value))

    def assign_embedding(self, session, pretrained):
        session.run(tf.assign(self.W_emb, pretrained))

Обучение:

import numpy as np
from datetime import datetime
import tensorflow as tf
import pickle
import lstmModel as lstm
import time
import os

def frameDataBasedOnKey(config,keys,inputIdWithKey, labelIdWithKey):
    num_classes = config['num_classes']
    sent_len = config['sent_len']
    stg_len = len(keys)

    stg_inputIds = [inputIdWithKey.get(key) for key in keys]
    stg_labelIds = [labelIdWithKey.get(key) for key in keys]

    stg_input = np.zeros([stg_len,sent_len],dtype=int)
    for rowId,rowVal in enumerate(stg_inputIds):
        for colId,colVal in enumerate(rowVal):
            stg_input[rowId][colId] = colVal

    stg_Label = np.zeros([stg_len,num_classes],dtype=int)
    scalingTo100 = 0-config['minlblIndex']
    for rowId,rowVal in enumerate(stg_labelIds):
        for colVal in [rVal for rVal in rowVal if rVal >= config['minlblIndex'] and rVal < config['maxlblIndex']]:
            stg_Label[rowId][colVal+scalingTo100] = 1
    return (stg_input,stg_Label)

def batch_iter(inputIdWithKey,labelIdWithKey,keys,config, isTest = False):
    data_size = len(keys)
    num_batches_per_epoch = int(np.ceil(float(data_size)/config['batch_size']))
    num_epochs= 1 if isTest else config['num_epochs']
    for epoch in range(num_epochs):
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * config['batch_size']
            end_index = min((batch_num + 1) * config['batch_size'], data_size)
            yield frameDataBasedOnKey(config,keys[start_index:end_index], inputIdWithKey, labelIdWithKey)

def offset(array, pre, post):
    ret = np.array(array)
    ret = np.insert(ret, 0, pre)
    ret = np.append(ret, post)
    return ret

def calc_auc_pr(precision, recall):
    assert len(precision) == len(recall)
    return np.trapz(offset(precision, 1, 0), x=offset(recall, 0, 1), dx=5)

def _summary_for_scalar(name, value):
    return tf.Summary(value=[tf.Summary.Value(tag=name, simple_value=float(value))])

def train(inputIdWithKey,labelIdWithKey,train_key, test_key,config,embeddingWordMatrix):
    num_batches_per_epoch = int(np.ceil(float(len(train_key))/config['batch_size']))
    max_steps = num_batches_per_epoch * config['num_epochs']

    with tf.Graph().as_default():
        with tf.variable_scope('lstm', reuse=None):
            m = lstm.Model(config, is_train=True)
        with tf.variable_scope('lstm', reuse=True):
            mtest = lstm.Model(config, is_train=False)

        saver = tf.train.Saver(tf.global_variables())
        save_path = os.path.join(config['ModelOutput'].format(config['minlblIndex'],config['maxlblIndex']), 'model.ckpt')
        summary_op = tf.summary.merge_all()

        # session
        #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
        #with tf.Session().as_default() as sess:
        with tf.Session(config=tf.ConfigProto(log_device_placement=True)).as_default() as sess:

            proj_config = tf.contrib.tensorboard.plugins.projector.ProjectorConfig()
            '''embedding = proj_config.embeddings.add()
            embedding.tensor_name = m.W_emb.name
            embedding.metadata_path = os.path.join(FLAGS.data_dir, 'vocab.txt')'''

            train_summary_writer = tf.summary.FileWriter(os.path.join(config['ModelOutput'].format(config['minlblIndex'],config['maxlblIndex']), "train"), graph=sess.graph)
            dev_summary_writer = tf.summary.FileWriter(os.path.join(config['ModelOutput'].format(config['minlblIndex'],config['maxlblIndex']), "dev"), graph=sess.graph)
            tf.contrib.tensorboard.plugins.projector.visualize_embeddings(train_summary_writer, proj_config)
            tf.contrib.tensorboard.plugins.projector.visualize_embeddings(dev_summary_writer, proj_config)

            sess.run(tf.global_variables_initializer())

            # assign pretrained embeddings
            if config['use_pretrain']:
                pretrained_embedding = np.array(embeddingWordMatrix.get('Embedding_matrix'))
                m.assign_embedding(sess, pretrained_embedding)

            # initialize parameters
            current_lr = config['init_lr']
            lowest_loss_value = float("inf")
            decay_step_counter = 0
            global_step = 0

            # evaluate on dev set
            def dev_step(mtest, sess):
                dev_loss = []
                dev_auc = []
                dev_f1_score = []

                # create batch
                test_batches = batch_iter(inputIdWithKey,labelIdWithKey,test_key, config, isTest = True)
                for x_batch,y_batch in test_batches:
                    #loss_value, eval_value = sess.run([mtest.total_loss, mtest.eval_op],
                    loss_value, pre, rec = sess.run([mtest.total_loss, mtest.pre_op, mtest.rec_op],
                        feed_dict={mtest.inputs: np.array(x_batch), mtest.labels: np.array(y_batch)})
                    dev_loss.append(loss_value)
                    dev_auc.append(calc_auc_pr(pre, rec))
                    fOne = 0.0
                    if pre[0] != 0.0 and rec[0] != 0.0:
                        fOne = (2.0 * pre[0] * rec[0]) / (pre[0] + rec[0])
                    dev_f1_score.append(fOne) # threshold = 0.5

                return np.mean(dev_loss), np.mean(dev_auc), np.mean(dev_f1_score)

            # train loop
            train_loss = []
            train_auc = []
            train_f1_score = []
            train_batches = batch_iter(inputIdWithKey,labelIdWithKey,train_key, config)
            for x_batch,y_batch in train_batches:
                batch_size = len(x_batch)

                m.assign_lr(sess, current_lr)
                global_step += 1

                feed = {m.inputs: np.array(x_batch), m.labels: np.array(y_batch)}
                start_time = time.time()
                #_, loss_value, eval_value = sess.run([m.train_op, m.total_loss, m.eval_op], feed_dict=feed)
                _, loss_value, pre, rec = sess.run([m.train_op, m.total_loss, m.pre_op, m.rec_op], feed_dict=feed)
                proc_duration = time.time() - start_time
                train_loss.append(loss_value)
                #pre, rec = zip(*eval_value)
                #print("Pre: ", pre," Rec: ", rec)
                auc = calc_auc_pr(pre, rec)
                #f1 = (2.0 * pre[5] * rec[5]) / (pre[5] + rec[5]) # threshold = 0.5
                f1 =  0.0
                if pre[0] != 0.0 and rec[0] != 0:
                    f1 = (2.0 * pre[0] * rec[0]) / (pre[0] + rec[0])
                train_auc.append(auc)
                train_f1_score.append(f1)

                assert not np.isnan(loss_value), "Model loss is NaN."

                # print log
                if global_step % config['log_step'] == 0:
                    examples_per_sec = batch_size / proc_duration
                    format_str = '%s: step %d/%d, f1 = %.4f, auc = %.4f, loss = %.4f, pre = %.4f, rec = %.4f ' + \
                                 '(%.1f examples/sec; %.3f sec/batch), lr: %.6f'
                    print(format_str % (datetime.now(), global_step, max_steps, f1, auc, loss_value,pre[0], rec[0],
                                        examples_per_sec, proc_duration, current_lr))

                # write summary
                if global_step % config['summary_step'] == 0:

                    summary_str = sess.run(summary_op)
                    train_summary_writer.add_summary(summary_str, global_step)
                    dev_summary_writer.add_summary(summary_str, global_step)

                    # summary loss, f1
                    train_summary_writer.add_summary(
                        _summary_for_scalar('loss', np.mean(train_loss)), global_step=global_step)
                    train_summary_writer.add_summary(
                        _summary_for_scalar('auc', np.mean(train_auc)), global_step=global_step)
                    train_summary_writer.add_summary(
                        _summary_for_scalar('f1', np.mean(train_f1_score)), global_step=global_step)

                    dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess)
                    dev_summary_writer.add_summary(
                        _summary_for_scalar('loss', dev_loss), global_step=global_step)
                    dev_summary_writer.add_summary(
                        _summary_for_scalar('auc', dev_auc), global_step=global_step)
                    dev_summary_writer.add_summary(
                        _summary_for_scalar('f1', dev_f1), global_step=global_step)


                    dev_loss, dev_auc, dev_f1 = dev_step(mtest, sess)
                    print("\n===== write summary =====")
                    print("%s: step %d/%d: train_loss = %.6f, train_auc = %.4f, train_f1 = %.4f" \
                          % (datetime.now(), global_step, max_steps,
                             np.mean(train_loss), np.mean(train_auc), np.mean(train_f1_score)))
                    print("%s: step %d/%d:   dev_loss = %.6f,   dev_auc = %.4f,   dev_f1 = %.4f\n" \
                          % (datetime.now(), global_step, max_steps, dev_loss, dev_auc, dev_f1))

                    # reset container
                    train_loss = []
                    train_auc = []
                    train_f1_score = []

                # decay learning rate if necessary
                if loss_value < lowest_loss_value:
                    lowest_loss_value = loss_value
                    decay_step_counter = 0
                else:
                    decay_step_counter += 1
                if decay_step_counter >= config['tolerance_step']:
                    current_lr *= config['lr_decay']
                    print('%s: step %d/%d, Learning rate decays to %.5f' % \
                          (datetime.now(), global_step, max_steps, current_lr))
                    decay_step_counter = 0

                # stop learning if learning rate is too low
                if current_lr < 1e-5:
                    break

                if global_step % config['checkpoint_step'] == 0:
                    saver.save(sess, save_path, global_step=global_step)
            saver.save(sess, save_path, global_step=global_step)

config = {'batch_size':128,'num_epochs' : 2,'emb_size':300,'num_kernel':100,'min_window': 3,
      'max_window':5,'vocab_size':0,'num_classes':150,
      'sent_len':600,'l2_reg': 1e-4,'attention': False,'multi_label': True, 'optimizer':'adam','dropout':0.5,
      'use_pretrain': True, 'init_lr' : 1e-3,'summary_step': 150,'lr_decay' : 0.95,'checkpoint_step':8000,
      'tolerance_step':500,'log_step':25, 'thresholdVal': 0.5, 'ModelOutput':'ModelOutput_{0}-{1}',
      'minlblIndex':0,'maxlblIndex':150}

with tf.device('/cpu:0'):
    with open('shortListedEmbeddingMatrix_Full.pk', 'rb') as fin:
        embeddingWordMatrix = pickle.load(fin)

    with open('InputEmbIdWithKey.pk', 'rb') as fin:
        inputIdWithKey = pickle.load(fin)

    with open('ICDLabelFull_Id_WithKey.pk', 'rb') as fin:
        labelIdWithKey = pickle.load(fin)

    inputKey = set(inputIdWithKey.keys())
    labelKey = set(labelIdWithKey.keys())

    inputData = list(inputKey.intersection(labelKey))

    np.random.shuffle(inputData) # Shuffle the training data

    train_size = int(len(inputData) * 0.99)
    train_key = inputData[:train_size]
    test_key = inputData[train_size:]

    config['vocab_size'] = len(embeddingWordMatrix.get('word_vocab'))

print('Training in progress ...')
train(inputIdWithKey,labelIdWithKey,train_key, test_key, config, embeddingWordMatrix)

nvidia-smi and epoch progress:

...