Мы пытаемся написать многошаговую функцию затухания в Tensorflow, используя tf.train.piecewise_constant (), как предложено здесь . Документация Tensorflow здесь гласит:
«Когда активное выполнение включено, эта функция возвращает функцию, которая, в свою очередь, возвращает затраченную скорость обучения Тензор»
Однако, когда мы попытались запустить код, он вернул ошибку TypeError.
Возвращает ту же ошибку, даже когда используется lr ().
import tensorflow as tf
tf.enable_eager_execution()
import numpy as np
def conv3x3(out_planes, data_format ='channels_last', stride=1, padding='same', dilation=1, name = None,use_bias = False):
"""3x3 convolution with padding"""
return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 3,data_format= data_format,
strides=(stride, stride), padding='same', use_bias=use_bias,
dilation_rate = (dilation,dilation) , kernel_initializer=tf.initializers.he_normal(),name = name)
def conv1x1(out_planes,data_format ='channels_last', padding = 'same', stride=1):
"""1x1 convolution"""
return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 1, strides=(stride, stride),data_format= data_format,
padding=padding, use_bias=False, kernel_initializer=tf.initializers.he_normal())
class BasicBlock(tf.keras.Model):
expansion = 1
def __init__(self, planes=1, stride=1, data_format= 'channels_last', downsample=None, dilation=(1, 1), residual=True, key=None, stage = None):
super(BasicBlock, self).__init__()
self.data_format = data_format
bn_axis = 1 if self.data_format == 'channels_first' else 3
self.conv1 = conv3x3(out_planes= planes, stride = stride, padding='same' ,
data_format = self.data_format, dilation=dilation[0], name = '{}_{}_conv0'.format(key,stage))
self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis, name = '{}_{}_BN0'.format(key,stage))
self.conv2 = conv3x3(out_planes =planes, padding='same',
data_format = self.data_format, dilation=dilation[0],name = '{}_{}_conv1'.format(key,stage))
self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis,name = '{}_{}_BN1'.format(key,stage))
self.downsample = downsample
self.relu = tf.keras.layers.ReLU(name = '{}_{}_Relu'.format(key,stage))
self.stride = stride
self.residual = residual
def get_config(self):
base_config = {}
base_config['conv1'] = self.conv1.get_config()
base_config['bn1'] = self.bn1.get_config()
base_config['conv2'] = self.conv2.get_config()
base_config['bn2'] = self.bn2.get_config()
if self.downsample is not None:
base_config['downsample'] = self.downsample.get_config()
return base_config
def call(self, inputs, training=None):
residual = inputs
out = self.conv1(inputs)
out = self.bn1(out,training = training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(inputs)
if self.residual:
out += residual
out = self.relu(out)
return out
class Bottleneck(tf.keras.Model):
expansion = 4
def __init__(self, planes, stride=1, data_format = 'channels_last',downsample=None,dilation=(1, 1)):
super(Bottleneck, self).__init__()
bn_axis = 1 if data_format == 'channels_first' else 3
self.conv1 = conv1x1(planes, data_format = data_format)
self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis)
self.relu = tf.keras.layers.ReLU()
self.conv2 = conv3x3(planes, stride, padding= 'same', bias=False, data_format = data_format, dilation=dilation[1])
self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis)
self.conv3 =conv1x1( planes * 4, data_format = data_format, )
self.bn3 = tf.keras.layers.BatchNormalization(axis=bn_axis) # nn.BatchNorm2d(planes * self.expansion)
self.downsample = downsample
self.stride = stride
def get_config(self):
base_config = {}
base_config['conv1'] = self.conv1.get_config()
base_config['bn1'] = self.bn1.get_config()
base_config['conv2'] = self.conv2.get_config()
base_config['bn2'] = self.bn2.get_config()
base_config['conv3'] = self.conv3.get_config()
base_config['bn3'] = self.bn3.get_config()
if self.downsample is not None:
base_config['downsample'] = self.downsample.get_config()
return base_config
def call(self, inputs, training=None):
identity = inputs
out = self.conv1(inputs)
out = self.bn1(out,training = training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out,training = training)
out = tf.nn.relu(out)
out = self.conv3(out)
out = self.bn3(out,training = training)
if self.downsample is not None:
identity = self.downsample(inputs)
out += identity
out = self.relu(out)
return out
class pooling (tf.keras.Model):
def __init__(self, pool_size, stride = None, data_format='channels_last'):
super(pooling, self).__init__()
self.pool_size = pool_size
self.data_format = data_format
if stride is None:
self.stride =self.pool_size
else:
self.stride = stride
def call(self, inputs):
return tf.layers.average_pooling2d(inputs, strides =self.stride, pool_size = self.pool_size, data_format = self.data_format)
class DRN(tf.keras.Model):
def __init__(self, block, layers, data_format='channels_last', num_classes=7,channels=(16, 32, 64, 128, 256, 512, 512, 512),
out_map=False, out_middle=False, pool_size=28, arch='D'):
super(DRN, self).__init__()
self.inplanes = channels[0]
self.out_map = out_map
self.out_dim = channels[-1]
self.out_middle = out_middle
self.arch = arch
self.poolsize = pool_size
self.data_format = data_format
self.bn_axis = 1 if data_format == 'channels_first' else 3
self.conv0 = tf.keras.layers.Conv2D(filters=channels[0], kernel_size=7, strides=1, padding='same',
use_bias=False, data_format = self.data_format, kernel_initializer=tf.initializers.he_normal(), name ='L0_conv0' )
self.bn0 = tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='L0_BN0')
self.relu0 = tf.keras.layers.ReLU(name ='L0_Relu0')
if arch == 'C':
self.layer1 = self._make_layer(block = BasicBlock, planes = channels[0], blocks = layers[0], stride=1, data_format = self.data_format, key='CL1')
self.layer2 = self._make_layer(block = BasicBlock, planes = channels[1], blocks = layers[1], stride=2, data_format = self.data_format, key='CL2')
elif arch == 'D':
self.layer1 = self._make_conv_layers(channels = channels[0],convs = layers[0], stride=1, data_format = self.data_format, key='DL1')
self.layer2 = self._make_conv_layers(channels = channels[1],convs = layers[1], stride=2, data_format = self.data_format, key='DL2')
self.layer3 = self._make_layer(block = block, planes = channels[2], blocks = layers[2], stride=2, data_format = self.data_format, key='L3')
self.layer4 = self._make_layer(block = block, planes = channels[3], blocks = layers[3], stride=2, data_format = self.data_format, key='L4')
self.layer5 = self._make_layer(block = block, planes = channels[4], blocks = layers[4], dilation=2, new_level=False, data_format = self.data_format, key='L5')
self.layer6 = None if layers[5] == 0 else self._make_layer(block, channels[5], layers[5], dilation=4, new_level=False, data_format = self.data_format, key='L6')
if arch == 'C':
self.layer7 = None if layers[6] == 0 else self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, new_level=False, residual=False, data_format = self.data_format, key='CL7')
self.layer8 = None if layers[7] == 0 else self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, new_level=False, residual=False, data_format = self.data_format, key='CL8')
elif arch == 'D':
self.layer7 = None if layers[6] == 0 else self._make_conv_layers(channels[6], layers[6], dilation=2, data_format = self.data_format, key='DL7')
self.layer8 = None if layers[7] == 0 else self._make_conv_layers(channels[7], layers[7], dilation=1, data_format = self.data_format, key='DL8')
if num_classes > 0:
self.avgpool = tf.keras.layers.GlobalAveragePooling2D(data_format = self.data_format)
self.fc = tf.keras.layers.Dense(units=num_classes)
def _make_layer(self, block, planes, blocks, stride=1,dilation=1, new_level=True, data_format = 'channels_last', residual=True, key=None):
assert dilation == 1 or dilation % 2 == 0
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = tf.keras.Sequential([conv1x1(out_planes = planes * block.expansion,stride = stride, data_format = data_format),
tf.keras.layers.BatchNormalization(axis=self.bn_axis)], name = 'downsample')
#
layers = []
layers.append(block(planes= planes, stride = stride, downsample = downsample, dilation=(1, 1) if dilation == 1 else (
dilation // 2 if new_level else dilation, dilation), data_format=data_format, residual=residual, key = key, stage = '0'))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(planes, residual=residual,dilation=(dilation, dilation), data_format=data_format, key = key, stage = i))
return tf.keras.Sequential(layers, name = key)
def _make_conv_layers(self, channels, convs, stride=1, dilation=1 ,data_format = 'channels_last', key = None):
modules = []
for i in range(convs):
modules.extend([
conv3x3(out_planes= channels, stride=stride if i == 0 else 1,
padding= 'same' , use_bias=False, dilation=dilation, data_format = data_format,name ='{}_{}_Conv'.format(key,i)),
tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='{}_{}_BN'.format(key,i)),
tf.keras.layers.ReLU(name ='{}_{}_Relu'.format(key,i))])
self.inplanes = channels
return tf.keras.Sequential(modules,name=key)
def call(self, x, training=None):
x = self.conv0(x)
x = self.bn0(x,training = training)
x = self.relu0(x)
x = self.layer1(x,training = training)
x = self.layer2(x,training = training)
x = self.layer3(x,training = training)
x = self.layer4(x,training = training)
x = self.layer5(x,training = training)
if self.layer6 is not None:
x = self.layer6(x,training = training)
if self.layer7 is not None:
x = self.layer7(x)
if self.layer8 is not None:
x = self.layer8(x)
if self.out_map:
x = self.fc(x)
else:
x = self.avgpool(x)
x = self.fc(x)
return x
def loss(logits, labels):
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
def make_scheduler(policy, init_lr, n_step_epoch, global_step):
total_steps= n_step_epoch * 10 #10 epochs
milestones = policy.split('_')
milestones.pop(0)
milestones = list(map(lambda x: int(x), milestones))
boundaries = np.multiply(milestones,n_step_epoch)
values = [init_lr] + [init_lr/(0.1**-i) for i in range(1,len(milestones)+1)]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
return learning_rate
def train(model, optimizer, step_counter ):
"""Trains model on `dataset` using `optimizer`."""
for (batch, i) in enumerate(range(10)):
print('Training Loop {}'.format(i))
images = tf.random.uniform((4, 224, 224,3))
labels = tf.constant(np.random.randint(4, size=4))
with tf.contrib.summary.record_summaries_every_n_global_steps(10, global_step=step_counter):
with tf.GradientTape() as tape:
logits = model(images, training=True)
loss_value = loss(logits, labels)
grads = tape.gradient(loss_value, model.variables)
optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
def test(model):
"""Perform an evaluation of `model` on the examples from `dataset`."""
for i in (range(10)):
images = tf.random.uniform((4, 225, 225,3))
logits = model(images, training=False)
print(logits)
def main():
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C',num_classes = 4)
device = '/gpu:0'
step_counter = tf.train.get_or_create_global_step()
lr = make_scheduler(policy='multistep_2_5',init_lr=0.1,n_step_epoch = 10,global_step= step_counter)
optimizer = tf.train.MomentumOptimizer(lr,momentum=0.5)
with tf.device(device):
for _ in range(10):
train(model, optimizer,step_counter)
print(optimizer._lr_t)
test(model)
if __name__ == '__main__':
main()
Файл "", строка 1, в
runfile ('/ home / srijith / work / Tensorflow / SkinCaner_tensorflow / debug / stackoverflow.py', wdir = '/ home / srijith / work / Tensorflow / SkinCaner_tensorflow / debug')
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", строка 709, в runfile
execfile (имя файла, пространство имен)
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", строка 108, в execfile
exec (compile (f.read (), filename, 'exec'), пространство имен)
Файл "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", строка 311, в
Основной ()
Файл "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", строка 305, в основном
поезд (модель, оптимизатор, счетчик шагов)
Файл "/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py", строка 284, в поезде
optimizer.apply_gradients (zip (grads, model.variables), global_step = step_counter)
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py", строка 598, в apply_gradients
self._prepare ()
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/momentum.py", строка 87, в _prepare
learning_rate = learning_rate ()
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/learning_rate_decay_v2.py", строка 171, в decayed_lr
границы = ops.convert_n_to_tensor (границы)
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", строка 1273, в convert_n_to_tensor
as_ref = False)
Файл "/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", строка 1228, в internal_convert_n_to_tensor
поднять TypeError («значения должны быть списком.»)
TypeError: значения должны быть списком.
Код работает, как и ожидалось, когда мы обеспечиваем постоянную скорость обучения. Мы что-то упускаем?