Tensorflow: вывод функции многошагового затухания возвращает ошибку TypeError - PullRequest
1 голос
/ 15 июня 2019

Мы пытаемся написать многошаговую функцию затухания в Tensorflow, используя tf.train.piecewise_constant (), как предложено здесь . Документация Tensorflow здесь гласит:

«Когда активное выполнение включено, эта функция возвращает функцию, которая, в свою очередь, возвращает затраченную скорость обучения Тензор»

Однако, когда мы попытались запустить код, он вернул ошибку TypeError. Возвращает ту же ошибку, даже когда используется lr ().

import tensorflow as tf
tf.enable_eager_execution()
import numpy as np

def conv3x3(out_planes, data_format ='channels_last',  stride=1, padding='same', dilation=1, name = None,use_bias = False):
    """3x3 convolution with padding"""
    return  tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 3,data_format= data_format,
                                   strides=(stride, stride), padding='same', use_bias=use_bias,
                                   dilation_rate = (dilation,dilation) , kernel_initializer=tf.initializers.he_normal(),name = name)


def conv1x1(out_planes,data_format ='channels_last', padding = 'same', stride=1):
    """1x1 convolution"""
    return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 1, strides=(stride, stride),data_format= data_format,
                                  padding=padding, use_bias=False, kernel_initializer=tf.initializers.he_normal())

class BasicBlock(tf.keras.Model):
    expansion = 1

    def __init__(self, planes=1, stride=1, data_format= 'channels_last', downsample=None,  dilation=(1, 1), residual=True, key=None, stage = None):
        super(BasicBlock, self).__init__()
        self.data_format = data_format
        bn_axis = 1 if self.data_format == 'channels_first' else 3
        self.conv1 = conv3x3(out_planes= planes, stride = stride, padding='same' ,
                             data_format = self.data_format, dilation=dilation[0], name = '{}_{}_conv0'.format(key,stage))

        self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis, name = '{}_{}_BN0'.format(key,stage))

        self.conv2 = conv3x3(out_planes =planes, padding='same',
                             data_format = self.data_format, dilation=dilation[0],name = '{}_{}_conv1'.format(key,stage))

        self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis,name = '{}_{}_BN1'.format(key,stage))

        self.downsample = downsample
        self.relu = tf.keras.layers.ReLU(name = '{}_{}_Relu'.format(key,stage))
        self.stride = stride
        self.residual = residual

    def get_config(self):
        base_config = {}
        base_config['conv1'] = self.conv1.get_config()
        base_config['bn1'] = self.bn1.get_config()
        base_config['conv2'] = self.conv2.get_config()
        base_config['bn2'] = self.bn2.get_config()
        if self.downsample is not None:
            base_config['downsample'] = self.downsample.get_config()
        return base_config


    def call(self, inputs, training=None):
        residual = inputs
        out = self.conv1(inputs)
        out = self.bn1(out,training = training)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(inputs)
        if self.residual:
            out += residual
        out = self.relu(out)
        return out


class Bottleneck(tf.keras.Model):
    expansion = 4

    def __init__(self, planes, stride=1, data_format = 'channels_last',downsample=None,dilation=(1, 1)):
        super(Bottleneck, self).__init__()

        bn_axis = 1 if data_format == 'channels_first' else 3
        self.conv1 = conv1x1(planes, data_format = data_format)
        self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis)
        self.relu = tf.keras.layers.ReLU()
        self.conv2 = conv3x3(planes, stride, padding= 'same', bias=False,  data_format = data_format, dilation=dilation[1])
        self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis)
        self.conv3 =conv1x1( planes * 4, data_format = data_format, )
        self.bn3 =  tf.keras.layers.BatchNormalization(axis=bn_axis) # nn.BatchNorm2d(planes * self.expansion)
        self.downsample = downsample
        self.stride = stride

    def get_config(self):
        base_config = {}
        base_config['conv1'] = self.conv1.get_config()
        base_config['bn1'] = self.bn1.get_config()
        base_config['conv2'] = self.conv2.get_config()
        base_config['bn2'] = self.bn2.get_config()
        base_config['conv3'] = self.conv3.get_config()
        base_config['bn3'] = self.bn3.get_config()
        if self.downsample is not None:
            base_config['downsample'] = self.downsample.get_config()
        return base_config



    def call(self, inputs, training=None):
        identity = inputs
        out = self.conv1(inputs)
        out = self.bn1(out,training = training)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out,training = training)
        out = tf.nn.relu(out)
        out = self.conv3(out)
        out = self.bn3(out,training = training)
        if self.downsample is not None:
            identity = self.downsample(inputs)
        out += identity
        out = self.relu(out)
        return out

class pooling (tf.keras.Model):
    def __init__(self, pool_size, stride = None, data_format='channels_last'):
        super(pooling, self).__init__()
        self.pool_size = pool_size
        self.data_format = data_format
        if stride is None:
            self.stride =self.pool_size
        else:
            self.stride = stride


    def call(self, inputs):
        return tf.layers.average_pooling2d(inputs, strides =self.stride, pool_size = self.pool_size, data_format = self.data_format)


class DRN(tf.keras.Model):
    def __init__(self, block, layers, data_format='channels_last', num_classes=7,channels=(16, 32, 64, 128, 256, 512, 512, 512),
                 out_map=False, out_middle=False, pool_size=28, arch='D'):
        super(DRN, self).__init__()
        self.inplanes = channels[0]
        self.out_map = out_map
        self.out_dim = channels[-1]
        self.out_middle = out_middle
        self.arch = arch
        self.poolsize = pool_size
        self.data_format = data_format
        self.bn_axis = 1 if data_format == 'channels_first' else 3

        self.conv0 = tf.keras.layers.Conv2D(filters=channels[0], kernel_size=7, strides=1,  padding='same',
                                               use_bias=False, data_format = self.data_format, kernel_initializer=tf.initializers.he_normal(), name ='L0_conv0' )
        self.bn0 = tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='L0_BN0')
        self.relu0 = tf.keras.layers.ReLU(name ='L0_Relu0')


        if arch == 'C':
            self.layer1 = self._make_layer(block = BasicBlock, planes = channels[0], blocks = layers[0], stride=1, data_format = self.data_format, key='CL1')
            self.layer2 = self._make_layer(block = BasicBlock, planes =  channels[1], blocks = layers[1], stride=2, data_format = self.data_format, key='CL2')
        elif arch == 'D':
            self.layer1 = self._make_conv_layers(channels = channels[0],convs = layers[0], stride=1, data_format = self.data_format, key='DL1')
            self.layer2 = self._make_conv_layers(channels = channels[1],convs = layers[1], stride=2, data_format = self.data_format, key='DL2')


        self.layer3 = self._make_layer(block = block, planes = channels[2], blocks = layers[2], stride=2, data_format = self.data_format, key='L3')
        self.layer4 = self._make_layer(block = block, planes = channels[3], blocks = layers[3], stride=2, data_format = self.data_format, key='L4')
        self.layer5 = self._make_layer(block = block, planes = channels[4], blocks = layers[4], dilation=2, new_level=False, data_format = self.data_format, key='L5')
        self.layer6 = None if layers[5] == 0 else self._make_layer(block, channels[5], layers[5], dilation=4, new_level=False, data_format = self.data_format, key='L6')

        if arch == 'C':
            self.layer7 = None if layers[6] == 0 else self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, new_level=False, residual=False, data_format = self.data_format, key='CL7')
            self.layer8 = None if layers[7] == 0 else self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, new_level=False, residual=False, data_format = self.data_format, key='CL8')
        elif arch == 'D':
            self.layer7 = None if layers[6] == 0 else self._make_conv_layers(channels[6], layers[6], dilation=2, data_format = self.data_format, key='DL7')
            self.layer8 = None if layers[7] == 0 else self._make_conv_layers(channels[7], layers[7], dilation=1, data_format = self.data_format, key='DL8')

        if num_classes > 0:
            self.avgpool = tf.keras.layers.GlobalAveragePooling2D(data_format = self.data_format)
            self.fc = tf.keras.layers.Dense(units=num_classes)


    def _make_layer(self, block, planes, blocks, stride=1,dilation=1, new_level=True, data_format = 'channels_last', residual=True, key=None):
        assert dilation == 1 or dilation % 2 == 0
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = tf.keras.Sequential([conv1x1(out_planes = planes * block.expansion,stride = stride, data_format = data_format),
                      tf.keras.layers.BatchNormalization(axis=self.bn_axis)], name = 'downsample')

#
        layers = []
        layers.append(block(planes= planes, stride =  stride, downsample = downsample, dilation=(1, 1) if dilation == 1 else (
                dilation // 2 if new_level else dilation, dilation), data_format=data_format, residual=residual, key = key, stage = '0'))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(planes, residual=residual,dilation=(dilation, dilation), data_format=data_format, key = key, stage = i))
        return tf.keras.Sequential(layers, name = key)


    def _make_conv_layers(self, channels, convs, stride=1, dilation=1 ,data_format = 'channels_last', key = None):
        modules = []
        for i in range(convs):
            modules.extend([
                conv3x3(out_planes= channels, stride=stride if i == 0 else 1,
                          padding= 'same' , use_bias=False, dilation=dilation,  data_format = data_format,name ='{}_{}_Conv'.format(key,i)),
                tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='{}_{}_BN'.format(key,i)),
                tf.keras.layers.ReLU(name ='{}_{}_Relu'.format(key,i))])
            self.inplanes = channels
        return tf.keras.Sequential(modules,name=key)


    def call(self, x, training=None):
        x = self.conv0(x)
        x = self.bn0(x,training = training)
        x = self.relu0(x)
        x = self.layer1(x,training = training)
        x = self.layer2(x,training = training)
        x = self.layer3(x,training = training)
        x = self.layer4(x,training = training)
        x = self.layer5(x,training = training)

        if self.layer6 is not None:
            x = self.layer6(x,training = training)

        if self.layer7 is not None:
            x = self.layer7(x)
        if self.layer8 is not None:
            x = self.layer8(x)
        if self.out_map:
            x = self.fc(x)
        else:
            x = self.avgpool(x)
            x = self.fc(x)
        return x

def loss(logits, labels):
  return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))

def make_scheduler(policy, init_lr, n_step_epoch, global_step):
    total_steps= n_step_epoch * 10 #10 epochs
    milestones = policy.split('_')
    milestones.pop(0)
    milestones = list(map(lambda x: int(x), milestones))
    boundaries = np.multiply(milestones,n_step_epoch)
    values = [init_lr] + [init_lr/(0.1**-i) for i in  range(1,len(milestones)+1)]
    learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
    return learning_rate


def train(model, optimizer, step_counter ):
  """Trains model on `dataset` using `optimizer`."""

  for (batch, i) in enumerate(range(10)):
      print('Training Loop {}'.format(i))
      images = tf.random.uniform((4, 224, 224,3))
      labels = tf.constant(np.random.randint(4, size=4))
      with tf.contrib.summary.record_summaries_every_n_global_steps(10, global_step=step_counter):
          with tf.GradientTape() as tape:
            logits = model(images, training=True)
            loss_value = loss(logits, labels)
          grads = tape.gradient(loss_value, model.variables)
          optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)


def test(model):
  """Perform an evaluation of `model` on the examples from `dataset`."""
  for  i in (range(10)):
    images = tf.random.uniform((4, 225, 225,3))
    logits = model(images, training=False)
    print(logits)

def main():
    model =  DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C',num_classes = 4)
    device = '/gpu:0'
    step_counter = tf.train.get_or_create_global_step()
    lr = make_scheduler(policy='multistep_2_5',init_lr=0.1,n_step_epoch = 10,global_step= step_counter)
    optimizer = tf.train.MomentumOptimizer(lr,momentum=0.5)

    with tf.device(device):
        for _ in range(10):
           train(model, optimizer,step_counter)
           print(optimizer._lr_t)
           test(model)

if __name__ == '__main__':
  main()

Файл "", строка 1, в runfile ('/ home / srijith / work / Tensorflow / SkinCaner_tensorflow / debug / stackoverflow.py', wdir = '/ home / srijith / work / Tensorflow / SkinCaner_tensorflow / debug')

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", строка 709, в runfile execfile (имя файла, пространство имен)

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", строка 108, в execfile exec (compile (f.read (), filename, 'exec'), пространство имен)

Файл "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", строка 311, в Основной ()

Файл "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", строка 305, в основном поезд (модель, оптимизатор, счетчик шагов)

Файл "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", строка 284, в поезде optimizer.apply_gradients (zip (grads, model.variables), global_step = step_counter)

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py", строка 598, в apply_gradients self._prepare ()

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/momentum.py", строка 87, в _prepare learning_rate = learning_rate ()

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/learning_rate_decay_v2.py", строка 171, в decayed_lr границы = ops.convert_n_to_tensor (границы)

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", строка 1273, в convert_n_to_tensor as_ref = False)

Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", строка 1228, в internal_convert_n_to_tensor поднять TypeError («значения должны быть списком.»)

TypeError: значения должны быть списком.

Код работает, как и ожидалось, когда мы обеспечиваем постоянную скорость обучения. Мы что-то упускаем?

...