Сеть TensoFlow 2.0 не снижает потери при индивидуальном обучении - PullRequest
0 голосов
/ 13 апреля 2020

Я пытаюсь создать ResNet18 на TensorFlow 2.0

Но сеть, которую я построил, не уменьшает потери, так что эта сеть не может работать.

Я ссылался на официальные учебные пособия tenorflow2.0 для построения этой сети.

Почему эта сеть не может уменьшить потери?

Я подтвердил, что эта сеть может работать с model.fit методом.

ниже кодов. И моя версия TensorFlow 2.0.2

import tensorflow as tf 
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Dense,Conv2D, Input, MaxPool2D, BatchNormalization, Add, Activation, GlobalMaxPooling2D
import datetime

####################################### set memory growth ######################################
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    for k in range(len(physical_devices)):
        tf.config.experimental.set_memory_growth(physical_devices[k], True)
        print('memory growth:', tf.config.experimental.get_memory_growth(physical_devices[k]))
else:
    print("Not enough GPU hardware devices available")
################################################################################################

####################################### dataset preparation#####################################
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 128


def resize_crop_image_and_onehot(ds):
    image = ds["image"]
    image /= 255
    class_num = 10
    label = tf.one_hot(indices=[ds["label"]], depth=class_num, on_value=1, off_value=0, axis=1)

    label = tf.squeeze( label,
                        #axis: Optional[List[int]]=None,
                        #name=None,
                        #squeeze_dims=None
    )    
    return image, label

def load_ds(name, batch_size):
    (ds_train, ds_test), ds_info = tfds.load(name,
                                            split=["train", "test"],
                                            #shuffle_files=True,
                                            with_info=True)
    ds_train = ds_train.map(resize_crop_image_and_onehot, num_parallel_calls = AUTOTUNE)
    ds_test = ds_test.map(resize_crop_image_and_onehot, num_parallel_calls = AUTOTUNE)
    ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples).batch(batch_size).repeat().prefetch(buffer_size=AUTOTUNE)
    ds_test = ds_test.shuffle(ds_info.splits["test"].num_examples).batch(batch_size).repeat().prefetch(buffer_size=AUTOTUNE)

    return ds_train, ds_test, ds_info

ds_train, ds_test ,ds_info = load_ds("cifar10",batch_size=BATCH_SIZE)
###############################################################################################


#######################################  ResNet18 model  ########################################
class Conv_stage1_block(tf.keras.Model):
    def __init__(self, filters, strides=2, mode="2D", norm="BatchNorm",kernel_initializer='he_normal',name=None):  
        super(Conv_stage1_block,  self).__init__(name=name)
        self.conv1 = Conv2D(filters, kernel_size=7,strides=strides,kernel_initializer=kernel_initializer, padding='same')
        self.bn1 = BatchNormalization()
        self.act1 = Activation('relu')
        self.pool1 = MaxPool2D(pool_size=3, strides=2,padding="same")

    def call(self, x): 
        h = self.conv1(x)
        h = self.bn1(h)
        h = self.act1(h)
        output = self.pool1(h)

        return output

class Conv_block(tf.keras.Model):
    def __init__(self, filters,kernel_size=3, strides=2,  mode="2D", norm="BatchNorm",kernel_initializer='he_normal' , name=None):
        super(Conv_block, self).__init__(name=name)
        filters1, filters2 = filters
        self.bn1 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.conv1 = Conv2D(filters1, 1, strides = strides, kernel_initializer=kernel_initializer,padding='same')
        self.bn2 = BatchNormalization()
        self.relu2 = Activation('relu')
        self.conv2 = Conv2D(filters2,  kernel_size, kernel_initializer=kernel_initializer,padding='same')
        self.add = Add()

        self.s_bn = BatchNormalization()
        self.s_conv = Conv2D(filters2, 1, strides=strides, kernel_initializer=kernel_initializer,padding='same')

        self.relu = Activation('relu')

    def call(self, x):
        residual = x
        h = self.conv1(x)
        h = self.bn1(h)
        h = self.relu1(h)
        h = self.conv2(h)
        h = self.bn2(h)
        h = self.relu2(h)

        residual = self.s_conv(residual)
        residual = self.s_bn(residual)

        output = self.add([residual, h])
        output = self.relu(output)
        return output

class Identity_block(tf.keras.Model):
    def __init__(self, filters=None,kernel_size=3,  mode="2D", norm="BatchNorm",kernel_initializer='he_normal' , name=None):

        super(Identity_block,  self).__init__(name=name)
        #filters1, filters2 = filters
        filters1, filters2 = filters
        self.bn1 = BatchNormalization()
        self.relu1 = Activation('relu')
        self.conv1 = Conv2D(filters1, kernel_size, kernel_initializer=kernel_initializer,padding='same')
        self.bn2 = BatchNormalization()
        self.relu2 = Activation('relu')
        self.conv2 = Conv2D(filters2,  kernel_size, kernel_initializer=kernel_initializer,padding='same')
        self.add = Add()

        self.relu = Activation('relu')

    def call(self, x):
        residual = x
        h = self.conv1(x)
        h = self.bn1(h)
        h = self.relu1(h)
        h = self.conv2(h)
        h = self.bn2(h)
        h = self.relu2(h)
        # Merge
        output = self.add([residual, h])
        output = self.relu(output)

        return output

class Fin_layer(tf.keras.Model):
    def __init__(self, class_num=1000, include_top=True, name=None):
        super(Fin_layer, self).__init__(name=name)
        self.include_top = include_top
        self.gp = GlobalMaxPooling2D()
        if(self.include_top):
            self.dense = Dense(class_num,"softmax")

    def call(self, x):
        output = self.gp(x)
        if(self.include_top):
            output = self.dense(output)

        return output

class resnet_18(tf.keras.Model):
    def __init__(self, class_num=1000, include_top=True):
        super(resnet_18, self).__init__()
        filters = [[64, 64], [128, 128], [256, 256], [512, 512]]

        self.conv1 = Conv_stage1_block(filters=filters[0][0])

        self.conv_2_1 = Conv_block(filters=filters[0],strides=1)
        self.conv_2_2 = Identity_block(filters=filters[0])

        self.conv_3_1 = Conv_block(filters=filters[1])
        self.conv_3_2 = Identity_block(filters=filters[1])

        self.conv_4_1 = Conv_block(filters=filters[2])
        self.conv_4_2 = Identity_block(filters=filters[2])

        self.conv_5_1 = Conv_block(filters=filters[3])
        self.conv_5_2 = Identity_block(filters=filters[3])

        self.fin = Fin_layer(class_num=class_num,include_top=include_top)


    def call(self, x):
        h = self.conv1(x)

        h = self.conv_2_1(h)
        h = self.conv_2_2(h)

        h = self.conv_3_1(h)
        h = self.conv_3_2(h)

        h = self.conv_4_1(h)
        h = self.conv_4_2(h)

        h = self.conv_5_1(h)
        h = self.conv_5_2(h)

        output = self.fin(h)


        return output
###############################################################################################


model = resnet_18(class_num=10)
model(Input((32,32,3)))

#######################################  for train  ###########################################
#loss definition
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

#@tf.function
def loss(model, x, y):
  y_ = model(x)

  return loss_object(y_true=y, y_pred=y_)

@tf.function
def grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        #loss_value = loss(model, inputs, targets)
        loss_value = loss_object(targets, model(inputs))
    return loss_value, tape.gradient(loss_value, model.trainable_variables)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.1)

####test one iteration
for image, label in ds_train.take(1):
    loss_value, grads = grad(model, image, label)
    print("Step: {}, Initial Loss: {}".format(optimizer.iterations.numpy(),
                                                loss(model, image, label).numpy())
    )
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    print("Step: {},         Loss: {}".format(optimizer.iterations.numpy(),
                                          loss(model, image, label).numpy()))


####training loop
train_loss_results = []
train_accuracy_results = []
num_epochs = 201
i = 0

for epoch in range(num_epochs):
    epoch_loss_avg = tf.keras.metrics.Mean()
    epoch_accuracy = tf.keras.metrics.CategoricalCrossentropy()

    #print("train for {} steps".format(ds_info.splits["train"].num_examples // config.BATCH_SIZE))
    for x, y in ds_train:
        loss_value, grads = grad(model, x, y)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        epoch_loss_avg(loss_value)
        epoch_accuracy(y, model(x))

        print("iter: {:03d}, Loss: {:.3f}, Accuracy: {:.3f}".format(
                                                            i,
                                                            epoch_loss_avg.result(),
                                                            epoch_accuracy.result()),
                                                            datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            )
        i += 1

    train_loss_results.append(epoch_loss_avg.result())
    train_accuracy_results.append(epoch_accuracy.result())
    if epoch % 1 == 0:
            print("Epoch: {:03d}, Loss: {:.3f}, Accuracy: {:.3f}".format(
                                                            epoch,
                                                            epoch_loss_avg.result(),
                                                            epoch_accuracy.result()),
                                                            datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
            )

вывод

2020-04-14 00:51:55.380906: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libMIOpen.so
Step: 0, Initial Loss: 2.348322629928589
Step: 1,         Loss: 2.3595874309539795
iter: 000, Loss: 2.399, Accuracy: 15.111 20200414-005208
iter: 001, Loss: 2.371, Accuracy: 14.670 20200414-005208
iter: 002, Loss: 2.362, Accuracy: 14.523 20200414-005208
iter: 003, Loss: 2.363, Accuracy: 14.544 20200414-005208
iter: 004, Loss: 2.361, Accuracy: 14.506 20200414-005208
iter: 005, Loss: 2.363, Accuracy: 14.544 20200414-005208
iter: 006, Loss: 2.362, Accuracy: 14.517 20200414-005209
iter: 007, Loss: 2.363, Accuracy: 14.544 20200414-005209
iter: 008, Loss: 2.365, Accuracy: 14.565 20200414-005209
iter: 009, Loss: 2.365, Accuracy: 14.506 20200414-005209
iter: 010, Loss: 2.367, Accuracy: 14.550 20200414-005209
iter: 011, Loss: 2.365, Accuracy: 14.523 20200414-005209
iter: 012, Loss: 2.366, Accuracy: 14.539 20200414-005209
iter: 013, Loss: 2.366, Accuracy: 14.535 20200414-005209
iter: 014, Loss: 2.362, Accuracy: 14.481 20200414-005209
iter: 015, Loss: 2.361, Accuracy: 14.465 20200414-005209
iter: 016, Loss: 2.361, Accuracy: 14.474 20200414-005209
iter: 017, Loss: 2.357, Accuracy: 14.397 20200414-005210
iter: 018, Loss: 2.358, Accuracy: 14.415 20200414-005210
iter: 019, Loss: 2.360, Accuracy: 14.450 20200414-005210
iter: 020, Loss: 2.359, Accuracy: 14.439 20200414-005210
iter: 021, Loss: 2.359, Accuracy: 14.441 20200414-005210
iter: 022, Loss: 2.358, Accuracy: 14.432 20200414-005210
iter: 023, Loss: 2.359, Accuracy: 14.450 20200414-005210
iter: 024, Loss: 2.360, Accuracy: 14.456 20200414-005210
iter: 025, Loss: 2.358, Accuracy: 14.428 20200414-005210
iter: 026, Loss: 2.357, Accuracy: 14.416 20200414-005210
iter: 027, Loss: 2.357, Accuracy: 14.418 20200414-005210
iter: 028, Loss: 2.357, Accuracy: 14.420 20200414-005211
iter: 029, Loss: 2.357, Accuracy: 14.418 20200414-005211
iter: 030, Loss: 2.358, Accuracy: 14.436 20200414-005211
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...