Я пытаюсь создать ResNet18 на TensorFlow 2.0
Но сеть, которую я построил, не уменьшает потери, так что эта сеть не может работать.
Я ссылался на официальные учебные пособия tenorflow2.0 для построения этой сети.
Почему эта сеть не может уменьшить потери?
Я подтвердил, что эта сеть может работать с model.fit
методом.
ниже кодов. И моя версия TensorFlow 2.0.2
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense,Conv2D, Input, MaxPool2D, BatchNormalization, Add, Activation, GlobalMaxPooling2D
import datetime
####################################### set memory growth ######################################
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
for k in range(len(physical_devices)):
tf.config.experimental.set_memory_growth(physical_devices[k], True)
print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k]))
else:
print("Not enough GPU hardware devices available")
################################################################################################
####################################### dataset preparation#####################################
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 128
def resize_crop_image_and_onehot(ds):
image = ds["image"]
image /= 255
class_num = 10
label = tf.one_hot(indices=[ds["label"]], depth=class_num, on_value=1, off_value=0, axis=1)
label = tf.squeeze( label,
#axis: Optional[List[int]]=None,
#name=None,
#squeeze_dims=None
)
return image, label
def load_ds(name, batch_size):
(ds_train, ds_test), ds_info = tfds.load(name,
split=["train", "test"],
#shuffle_files=True,
with_info=True)
ds_train = ds_train.map(resize_crop_image_and_onehot, num_parallel_calls = AUTOTUNE)
ds_test = ds_test.map(resize_crop_image_and_onehot, num_parallel_calls = AUTOTUNE)
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples).batch(batch_size).repeat().prefetch(buffer_size=AUTOTUNE)
ds_test = ds_test.shuffle(ds_info.splits["test"].num_examples).batch(batch_size).repeat().prefetch(buffer_size=AUTOTUNE)
return ds_train, ds_test, ds_info
ds_train, ds_test ,ds_info = load_ds("cifar10",batch_size=BATCH_SIZE)
###############################################################################################
####################################### ResNet18 model ########################################
class Conv_stage1_block(tf.keras.Model):
def __init__(self, filters, strides=2, mode="2D", norm="BatchNorm",kernel_initializer='he_normal',name=None):
super(Conv_stage1_block, self).__init__(name=name)
self.conv1 = Conv2D(filters, kernel_size=7,strides=strides,kernel_initializer=kernel_initializer, padding='same')
self.bn1 = BatchNormalization()
self.act1 = Activation('relu')
self.pool1 = MaxPool2D(pool_size=3, strides=2,padding="same")
def call(self, x):
h = self.conv1(x)
h = self.bn1(h)
h = self.act1(h)
output = self.pool1(h)
return output
class Conv_block(tf.keras.Model):
def __init__(self, filters,kernel_size=3, strides=2, mode="2D", norm="BatchNorm",kernel_initializer='he_normal' , name=None):
super(Conv_block, self).__init__(name=name)
filters1, filters2 = filters
self.bn1 = BatchNormalization()
self.relu1 = Activation('relu')
self.conv1 = Conv2D(filters1, 1, strides = strides, kernel_initializer=kernel_initializer,padding='same')
self.bn2 = BatchNormalization()
self.relu2 = Activation('relu')
self.conv2 = Conv2D(filters2, kernel_size, kernel_initializer=kernel_initializer,padding='same')
self.add = Add()
self.s_bn = BatchNormalization()
self.s_conv = Conv2D(filters2, 1, strides=strides, kernel_initializer=kernel_initializer,padding='same')
self.relu = Activation('relu')
def call(self, x):
residual = x
h = self.conv1(x)
h = self.bn1(h)
h = self.relu1(h)
h = self.conv2(h)
h = self.bn2(h)
h = self.relu2(h)
residual = self.s_conv(residual)
residual = self.s_bn(residual)
output = self.add([residual, h])
output = self.relu(output)
return output
class Identity_block(tf.keras.Model):
def __init__(self, filters=None,kernel_size=3, mode="2D", norm="BatchNorm",kernel_initializer='he_normal' , name=None):
super(Identity_block, self).__init__(name=name)
#filters1, filters2 = filters
filters1, filters2 = filters
self.bn1 = BatchNormalization()
self.relu1 = Activation('relu')
self.conv1 = Conv2D(filters1, kernel_size, kernel_initializer=kernel_initializer,padding='same')
self.bn2 = BatchNormalization()
self.relu2 = Activation('relu')
self.conv2 = Conv2D(filters2, kernel_size, kernel_initializer=kernel_initializer,padding='same')
self.add = Add()
self.relu = Activation('relu')
def call(self, x):
residual = x
h = self.conv1(x)
h = self.bn1(h)
h = self.relu1(h)
h = self.conv2(h)
h = self.bn2(h)
h = self.relu2(h)
# Merge
output = self.add([residual, h])
output = self.relu(output)
return output
class Fin_layer(tf.keras.Model):
def __init__(self, class_num=1000, include_top=True, name=None):
super(Fin_layer, self).__init__(name=name)
self.include_top = include_top
self.gp = GlobalMaxPooling2D()
if(self.include_top):
self.dense = Dense(class_num,"softmax")
def call(self, x):
output = self.gp(x)
if(self.include_top):
output = self.dense(output)
return output
class resnet_18(tf.keras.Model):
def __init__(self, class_num=1000, include_top=True):
super(resnet_18, self).__init__()
filters = [[64, 64], [128, 128], [256, 256], [512, 512]]
self.conv1 = Conv_stage1_block(filters=filters[0][0])
self.conv_2_1 = Conv_block(filters=filters[0],strides=1)
self.conv_2_2 = Identity_block(filters=filters[0])
self.conv_3_1 = Conv_block(filters=filters[1])
self.conv_3_2 = Identity_block(filters=filters[1])
self.conv_4_1 = Conv_block(filters=filters[2])
self.conv_4_2 = Identity_block(filters=filters[2])
self.conv_5_1 = Conv_block(filters=filters[3])
self.conv_5_2 = Identity_block(filters=filters[3])
self.fin = Fin_layer(class_num=class_num,include_top=include_top)
def call(self, x):
h = self.conv1(x)
h = self.conv_2_1(h)
h = self.conv_2_2(h)
h = self.conv_3_1(h)
h = self.conv_3_2(h)
h = self.conv_4_1(h)
h = self.conv_4_2(h)
h = self.conv_5_1(h)
h = self.conv_5_2(h)
output = self.fin(h)
return output
###############################################################################################
model = resnet_18(class_num=10)
model(Input((32,32,3)))
####################################### for train ###########################################
#loss definition
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
#@tf.function
def loss(model, x, y):
y_ = model(x)
return loss_object(y_true=y, y_pred=y_)
@tf.function
def grad(model, inputs, targets):
with tf.GradientTape() as tape:
#loss_value = loss(model, inputs, targets)
loss_value = loss_object(targets, model(inputs))
return loss_value, tape.gradient(loss_value, model.trainable_variables)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)
####test one iteration
for image, label in ds_train.take(1):
loss_value, grads = grad(model, image, label)
print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
loss(model, image, label).numpy())
)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
print("Step: {}, Loss: {}".format(optimizer.iterations.numpy(),
loss(model, image, label).numpy()))
####training loop
train_loss_results = []
train_accuracy_results = []
num_epochs = 201
i = 0
for epoch in range(num_epochs):
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_accuracy = tf.keras.metrics.CategoricalCrossentropy()
#print("train for {} steps".format(ds_info.splits["train"].num_examples // config.BATCH_SIZE))
for x, y in ds_train:
loss_value, grads = grad(model, x, y)
optimizer.apply_gradients(zip(grads, model.trainable_variables))
epoch_loss_avg(loss_value)
epoch_accuracy(y, model(x))
print("iter: {:03d}, Loss: {:.3f}, Accuracy: {:.3f}".format(
i,
epoch_loss_avg.result(),
epoch_accuracy.result()),
datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
)
i += 1
train_loss_results.append(epoch_loss_avg.result())
train_accuracy_results.append(epoch_accuracy.result())
if epoch % 1 == 0:
print("Epoch: {:03d}, Loss: {:.3f}, Accuracy: {:.3f}".format(
epoch,
epoch_loss_avg.result(),
epoch_accuracy.result()),
datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
)
вывод
2020-04-14 00:51:55.380906: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
Step: 0, Initial Loss: 2.348322629928589
Step: 1, Loss: 2.3595874309539795
iter: 000, Loss: 2.399, Accuracy: 15.111 20200414-005208
iter: 001, Loss: 2.371, Accuracy: 14.670 20200414-005208
iter: 002, Loss: 2.362, Accuracy: 14.523 20200414-005208
iter: 003, Loss: 2.363, Accuracy: 14.544 20200414-005208
iter: 004, Loss: 2.361, Accuracy: 14.506 20200414-005208
iter: 005, Loss: 2.363, Accuracy: 14.544 20200414-005208
iter: 006, Loss: 2.362, Accuracy: 14.517 20200414-005209
iter: 007, Loss: 2.363, Accuracy: 14.544 20200414-005209
iter: 008, Loss: 2.365, Accuracy: 14.565 20200414-005209
iter: 009, Loss: 2.365, Accuracy: 14.506 20200414-005209
iter: 010, Loss: 2.367, Accuracy: 14.550 20200414-005209
iter: 011, Loss: 2.365, Accuracy: 14.523 20200414-005209
iter: 012, Loss: 2.366, Accuracy: 14.539 20200414-005209
iter: 013, Loss: 2.366, Accuracy: 14.535 20200414-005209
iter: 014, Loss: 2.362, Accuracy: 14.481 20200414-005209
iter: 015, Loss: 2.361, Accuracy: 14.465 20200414-005209
iter: 016, Loss: 2.361, Accuracy: 14.474 20200414-005209
iter: 017, Loss: 2.357, Accuracy: 14.397 20200414-005210
iter: 018, Loss: 2.358, Accuracy: 14.415 20200414-005210
iter: 019, Loss: 2.360, Accuracy: 14.450 20200414-005210
iter: 020, Loss: 2.359, Accuracy: 14.439 20200414-005210
iter: 021, Loss: 2.359, Accuracy: 14.441 20200414-005210
iter: 022, Loss: 2.358, Accuracy: 14.432 20200414-005210
iter: 023, Loss: 2.359, Accuracy: 14.450 20200414-005210
iter: 024, Loss: 2.360, Accuracy: 14.456 20200414-005210
iter: 025, Loss: 2.358, Accuracy: 14.428 20200414-005210
iter: 026, Loss: 2.357, Accuracy: 14.416 20200414-005210
iter: 027, Loss: 2.357, Accuracy: 14.418 20200414-005210
iter: 028, Loss: 2.357, Accuracy: 14.420 20200414-005211
iter: 029, Loss: 2.357, Accuracy: 14.418 20200414-005211
iter: 030, Loss: 2.358, Accuracy: 14.436 20200414-005211