Tensorflow 2.0 сохраняет результаты теста веса / веса вдали от поезда / действительный результат - PullRequest
0 голосов
/ 10 апреля 2020

Я пытался подготовить свой собственный начальный повтор snet v2 для перикулярной работы, и проблема заключается в следующем:

  1. Успешное выполнение этапов обучения / этапов проверки, точность очень высокая : Шаги обучения показывают высокую точность

  2. Сохраните вес и загрузите его для проведения тестирования, в то время как найденная классификация полностью не работает. Похоже, вес не обновляется правильно. Тестовые шаги показывают неправильную классификацию

Я так растерялся из-за того, почему это произошло, пожалуйста, помогите мне понять, что происходит, большое спасибо.

Основная функция, как показано ниже:

from __future__ import absolute_import, division, print_function
import tensorflow as tf
import math
import os
import datetime
import sys

# User defined packages
from configuration import IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS, \
    EPOCHS, BATCH_SIZE, save_model_root_dir, log_root_dir,  GLOBAL_LEARNING_RATE, \
    WEIGHT_DECAY, THRESHOLD
from prepare_data import generate_datasets, load_and_preprocess_image
from models import mobilenet_v1, mobilenet_v2, mobilenet_v3_large, mobilenet_v3_small, \
    efficientnet, resnext, inception_v4, inception_resnet_v1, inception_resnet_v2, \
    se_resnet, squeezenet, densenet, shufflenet_v2, resnet
from models.model_selection import get_model


def print_model_summary(network):
    network.build(input_shape=(None, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
    network.summary()


def process_features(features, data_augmentation):
    image_raw = features['image_raw'].numpy()
    image_tensor_list = []
    for image in image_raw:
        image_tensor = load_and_preprocess_image(image, data_augmentation=data_augmentation)
        image_tensor_list.append(image_tensor)
    images = tf.stack(image_tensor_list, axis=0)
    labels = features['label'].numpy()

    return images, labels

def folder_preparation(job_id, product_id):
    # Genearte log file path and precreate log file header
    log_dir = log_root_dir + job_id + "/" + product_id  + "/"
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    file = open(log_dir +"training_result_step" + ".log", "w")
    file.write("type\t")
    file.write("timestamp\t")
    file.write("epoch\t")
    file.write("step\t")
    file.write("train_accuracy\t")
    file.write("predict_labels\t")
    file.write("actual_labels\n")
    file.close()

    file = open(log_dir +"training_result" + ".log","w")
    file.write("timestamp\t")
    file.write("epoch\t")
    file.write("valid accuracy\n")
    file.close()

    # Generate save model path
    save_model_dir = save_model_root_dir + job_id + "/" + product_id + "/"
    if not os.path.exists(save_model_dir):
        os.makedirs(save_model_dir)

    return log_dir, save_model_dir

def main(argv):
    # Need the user to provide system argv for job_id and product_id, it is prepared for frontend calling
    if len(argv) < 2 or len(argv) > 3:
        print("ERROR: Format error, refer to the usage: python test.py job_id product_id")
    elif not argv[1].isdigit():
        print("ERROR: Format error, job_id must be in int format")
    elif not argv[1].isalnum():
        print("ERROR: Format error, product_id must be consistent by character or number, without special character")
    else:
        print("INFO: Start training model " + datetime.datetime.now().strftime("%Y%m%d%H%M%S")) 
        # GPU settings
        gpus = tf.config.list_physical_devices("GPU")
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        # Folder generate for log file and model saving
        log_dir, save_model_dir = folder_preparation(argv[1], argv[2])                

        # get the dataset
        train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets()

        # create model
        model = get_model()
        print_model_summary(network=model)

        # Setup target for validation dataset accuracy, only when the valid_accuracy reachs the threshold the weight can be saved
        threshold = THRESHOLD

        # define loss calculation
        loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

        # Tried RMSprop for optimizer, the result is not so good, finetune the optimizer to Adam or Momentum
        #optimizer = tf.keras.optimizers.RMSprop(learning_rate = GLOBAL_LEARNING_RATE,
        #                                        momentum = MOMENTUM,
        #                                        name = 'rms_optimizer')
        optimizer = tf.keras.optimizers.Adam(lr = GLOBAL_LEARNING_RATE, decay = WEIGHT_DECAY, name = 'adam_optimizer')

        # Define training KPI
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

        # Define valid KPI
        valid_loss = tf.keras.metrics.Mean(name='valid_loss')
        valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy')

        # @tf.function
        def train(image_batch, label_batch):
            with tf.GradientTape() as tape:
                predictions = model(image_batch, training=True)
                loss = loss_object(y_true=label_batch, y_pred=predictions)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))

            train_loss.update_state(values=loss)
            train_accuracy.update_state(y_true=label_batch, y_pred=predictions)

            return predictions.numpy(), tf.math.argmax(predictions, axis =1).numpy()

        # @tf.function
        def valid(image_batch, label_batch):
            predictions = model(image_batch, training=True)
            v_loss = loss_object(label_batch, predictions)

            valid_loss.update_state(values=v_loss)
            valid_accuracy.update_state(y_true=label_batch, y_pred=predictions)

            return tf.math.argmax(predictions, axis =1).numpy()

        # start training
        for epoch in range(EPOCHS):
            train_step = 0
            #valid_step = 0
            for features in train_dataset:
                train_step += 1
                images, labels = process_features(features, data_augmentation=False)
                predictions, predict_labels = train(images, labels)

                # Print the info on the screen for developer to monitor training detail
                print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}, softmax(logits):{}, "
                      "predict_label:{}, target_label:{}".format(epoch,
                                                                EPOCHS,
                                                                train_step,
                                                                math.ceil(train_count / BATCH_SIZE),
                                                                train_loss.result().numpy(),
                                                                train_accuracy.result().numpy(),
                                                                predictions,
                                                                predict_labels,
                                                                labels))

                # Record information into the log file
                file = open(log_dir +"training_result_step" + ".log", "a")
                file.write("train\t")
                file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
                file.write(str(epoch) + "\t")
                file.write(str(train_step) + "\t")
                file.write(str(train_accuracy.result().numpy()) + "\t")
                file.write(str(predict_labels) + "\t")
                file.write(str(labels) + "\n")
                file.close()

            for features in valid_dataset:
                #valid_step += 1
                valid_images, valid_labels = process_features(features, data_augmentation=False)
                predict_labels = valid(valid_images, valid_labels)

                #file = open(log_dir +"training_result_step" + ".log", "a")
                #file.write("validation\t")
                #file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
                #file.write(str(epoch) + "\t")
                #file.write(str(valid_step) + "\t")
                #file.write(str(valid_accuracy.result().numpy()) + "\t")
                #file.write(str(predict_labels) + "\t")
                #file.write(str(labels) + "\n")
                #file.close()

            # Print the info on the screen for developer to monitor validation result
            print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
                  "valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch,
                                                                      EPOCHS,
                                                                      train_loss.result().numpy(),
                                                                      train_accuracy.result().numpy(),
                                                                      valid_loss.result().numpy(),
                                                                      valid_accuracy.result().numpy()))
            # Create log file in txt format, easy for pandas to analysis and for best model selection
            file = open(log_dir +"training_result" + ".log","a")
            file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
            file.write(str(epoch) + "\t")
            file.write(str(valid_accuracy.result().numpy()) + "\n")
            file.close()

            valid_accuracy_result = valid_accuracy.result().numpy()

            train_loss.reset_states()
            train_accuracy.reset_states()
            valid_loss.reset_states()
            valid_accuracy.reset_states()

            # Save the weights for evaluation and prediction only when the valid accuracy is higher than threshold and best ever result
            if valid_accuracy_result >= threshold:
                model.save_weights(filepath=save_model_dir+"model", save_format='tf')

                # Threshold update
                threshold = valid_accuracy_result

if __name__ == '__main__':
    main(sys.argv)

Построение модели:

import tensorflow as tf
from models.inception_modules import Stem, ReductionA, BasicConv2D, Conv2DLinear
from configuration import NUM_CLASSES, DROPOUT_RATIO, L1_REGULIZER, L2_REGULIZER


class InceptionResNetA(tf.keras.layers.Layer):
    def __init__(self):
        super(InceptionResNetA, self).__init__()
        self.b1_conv = BasicConv2D(filters=32,
                                   kernel_size=(1, 1),
                                   strides=1,
                                   padding="same")
        self.b2_conv1 = BasicConv2D(filters=32,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b2_conv2 = BasicConv2D(filters=32,
                                    kernel_size=(3, 3),
                                    strides=1,
                                    padding="same")
        self.b3_conv1 = BasicConv2D(filters=32,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b3_conv2 = BasicConv2D(filters=48,
                                    kernel_size=(3, 3),
                                    strides=1,
                                    padding="same")
        self.b3_conv3 = BasicConv2D(filters=64,
                                    kernel_size=(3, 3),
                                    strides=1,
                                    padding="same")
        self.conv = Conv2DLinear(filters=384,
                                 kernel_size=(1, 1),
                                 strides=1,
                                 padding="same")

    def call(self, inputs, training=None, **kwargs):
        b1 = self.b1_conv(inputs, training=training)
        b2 = self.b2_conv1(inputs, training=training)
        b2 = self.b2_conv2(b2, training=training)
        b3 = self.b3_conv1(inputs, training=training)
        b3 = self.b3_conv2(b3, training=training)
        b3 = self.b3_conv3(b3, training=training)

        x = tf.concat(values=[b1, b2, b3], axis=-1)
        x = self.conv(x, training=training)

        output = tf.keras.layers.add([x, inputs])
        return tf.nn.relu(output)


class InceptionResNetB(tf.keras.layers.Layer):
    def __init__(self):
        super(InceptionResNetB, self).__init__()
        self.b1_conv = BasicConv2D(filters=192,
                                   kernel_size=(1, 1),
                                   strides=1,
                                   padding="same")
        self.b2_conv1 = BasicConv2D(filters=128,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b2_conv2 = BasicConv2D(filters=160,
                                    kernel_size=(1, 7),
                                    strides=1,
                                    padding="same")
        self.b2_conv3 = BasicConv2D(filters=192,
                                    kernel_size=(7, 1),
                                    strides=1,
                                    padding="same")
        self.conv = Conv2DLinear(filters=1152,
                                 kernel_size=(1, 1),
                                 strides=1,
                                 padding="same")

    def call(self, inputs, training=None, **kwargs):
        b1 = self.b1_conv(inputs, training=training)
        b2 = self.b2_conv1(inputs, training=training)
        b2 = self.b2_conv2(b2, training=training)
        b2 = self.b2_conv3(b2, training=training)

        x = tf.concat(values=[b1, b2], axis=-1)
        x = self.conv(x, training=training)

        output = tf.keras.layers.add([x, inputs])

        return tf.nn.relu(output)


class InceptionResNetC(tf.keras.layers.Layer):
    def __init__(self):
        super(InceptionResNetC, self).__init__()
        self.b1_conv = BasicConv2D(filters=192,
                                   kernel_size=(1, 1),
                                   strides=1,
                                   padding="same")
        self.b2_conv1 = BasicConv2D(filters=192,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b2_conv2 = BasicConv2D(filters=224,
                                    kernel_size=(1, 3),
                                    strides=1,
                                    padding="same")
        self.b2_conv3 = BasicConv2D(filters=256,
                                    kernel_size=(3, 1),
                                    strides=1,
                                    padding="same")
        self.conv = Conv2DLinear(filters=2144,
                                 kernel_size=(1, 1),
                                 strides=1,
                                 padding="same")

    def call(self, inputs, training=None, **kwargs):
        b1 = self.b1_conv(inputs, training=training)
        b2 = self.b2_conv1(inputs, training=training)
        b2 = self.b2_conv2(b2, training=training)
        b2 = self.b2_conv3(b2, training=training)

        x = tf.concat(values=[b1, b2], axis=-1)
        x = self.conv(x, training=training)

        output = tf.keras.layers.add([x, inputs])

        return tf.nn.relu(output)


class ReductionB(tf.keras.layers.Layer):
    def __init__(self):
        super(ReductionB, self).__init__()
        self.b1_maxpool = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
                                                    strides=2,
                                                    padding="valid")
        self.b2_conv1 = BasicConv2D(filters=256,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b2_conv2 = BasicConv2D(filters=384,
                                    kernel_size=(3, 3),
                                    strides=2,
                                    padding="valid")
        self.b3_conv1 = BasicConv2D(filters=256,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b3_conv2 = BasicConv2D(filters=288,
                                    kernel_size=(3, 3),
                                    strides=2,
                                    padding="valid")
        self.b4_conv1 = BasicConv2D(filters=256,
                                    kernel_size=(1, 1),
                                    strides=1,
                                    padding="same")
        self.b4_conv2 = BasicConv2D(filters=288,
                                    kernel_size=(3, 3),
                                    strides=1,
                                    padding="same")
        self.b4_conv3 = BasicConv2D(filters=320,
                                    kernel_size=(3, 3),
                                    strides=2,
                                    padding="valid")

    def call(self, inputs, training=None, **kwargs):
        b1 = self.b1_maxpool(inputs)

        b2 = self.b2_conv1(inputs, training=training)
        b2 = self.b2_conv2(b2, training=training)

        b3 = self.b3_conv1(inputs, training=training)
        b3 = self.b3_conv2(b3, training=training)

        b4 = self.b4_conv1(inputs, training=training)
        b4 = self.b4_conv2(b4, training=training)
        b4 = self.b4_conv3(b4, training=training)

        return tf.concat(values=[b1, b2, b3, b4], axis=-1)


def build_inception_resnet_a(n):
    block = tf.keras.Sequential()
    for _ in range(n):
        block.add(InceptionResNetA())
    return block


def build_inception_resnet_b(n):
    block = tf.keras.Sequential()
    for _ in range(n):
        block.add(InceptionResNetB())
    return block


def build_inception_resnet_c(n):
    block = tf.keras.Sequential()
    for _ in range(n):
        block.add(InceptionResNetC())
    return block


class InceptionResNetV2(tf.keras.Model):
    def __init__(self):
        super(InceptionResNetV2, self).__init__()
        self.stem = Stem()
        self.inception_resnet_a = build_inception_resnet_a(5)
        self.reduction_a = ReductionA(k=256, l=256, m=384, n=384)
        self.inception_resnet_b = build_inception_resnet_b(10)
        self.reduction_b = ReductionB()
        self.inception_resnet_c = build_inception_resnet_c(5)
        self.avgpool = tf.keras.layers.AveragePooling2D(pool_size=(8, 8))
        self.dropout = tf.keras.layers.Dropout(rate=DROPOUT_RATIO)
        self.flat = tf.keras.layers.Flatten()
        self.fc = tf.keras.layers.Dense(units=NUM_CLASSES,
                                        activation=tf.keras.activations.softmax,
                                        kernel_regularizer=tf.keras.regularizers.l1(L1_REGULIZER),
                                        activity_regularizer=tf.keras.regularizers.l2(L2_REGULIZER)
                                       )

    def call(self, inputs, training=None, mask=None):
        x = self.stem(inputs, training=training)
        x = self.inception_resnet_a(x, training=training)
        x = self.reduction_a(x, training=training)
        x = self.inception_resnet_b(x, training=training)
        x = self.reduction_b(x, training=training)
        x = self.inception_resnet_c(x, training=training)
        x = self.avgpool(x)
        x = self.dropout(x, training=training)
        x = self.flat(x)
        x = self.fc(x)

        return x

Гиперпараметры

lr = 0.0001
weight decay = 1e-4
l1 reg = 0.01
l2 reg = 0.01
drop-out rate = 0.2

Сценарий оценки

import tensorflow as tf
import sys
import datetime
import os

from configuration import save_model_root_dir, log_root_dir
from prepare_data import generate_datasets
from train import get_model, process_features

def folder_preparation(job_id, product_id):
    # Genearte log file path and precreate log file header
    log_dir = log_root_dir + job_id + "/" + product_id  + "/"
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)
    file = open(log_dir +"test_result_step" + ".log", "w")
    file.write("type\t")
    file.write("timestamp\t")
    file.write("batch\t")
    file.write("test_accuracy\t")
    file.write("predict_labels\t")
    file.write("actual_labels\n")
    file.close()

    file = open(log_dir +"test_result" + ".log","w")
    file.write("timestamp\t")
    file.write("test accuracy\n")
    file.close()

    # Generate save model path
    save_model_dir = save_model_root_dir + job_id + "/" + product_id + "/"
    if not os.path.exists(save_model_dir):
        os.makedirs(save_model_dir)

    return log_dir, save_model_dir

def main(argv):
    # Need the user to provide system argv for job_id and product_id, it is prepared for frontend calling
    if len(argv) < 2 or len(argv) > 3:
        print("ERROR: Format error, refer to the usage: python test.py job_id product_id")
    elif not argv[1].isdigit():
        print("ERROR: Format error, job_id must be in int format")
    elif not argv[1].isalnum():
        print("ERROR: Format error, product_id must be consistent by character or number, without special character")
    else:
        print("INFO: Start evaluating model " + datetime.datetime.now().strftime("%Y%m%d%H%M%S")) 


        # GPU settings
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)

        # Folder generate for log file and model saving
        log_dir, save_model_dir = folder_preparation(argv[1], argv[2]) 

        # get the original_dataset
        train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets()
        # load the model
        model = get_model()
        model.load_weights(filepath="saved_model/model") #Already copied to the folder
        # model = tf.saved_model.load(save_model_dir)

        # Get the accuracy on the test set
        loss_object = tf.keras.metrics.SparseCategoricalCrossentropy()
        test_loss = tf.keras.metrics.Mean()
        test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

        # @tf.function
        def test_step(images, labels):
            predictions = model(images, training=False)
            t_loss = loss_object(labels, predictions)
            test_loss(t_loss)
            test_accuracy(labels, predictions)

            return tf.math.argmax(predictions, axis =1).numpy()

        batch = 0
        for features in test_dataset:
            batch += 1
            test_images, test_labels = process_features(features, data_augmentation=False)
            predict_labels = test_step(test_images, test_labels)
            print("loss: {:.5f}, test accuracy: {:.5f}, predict_labels:{}, test_labels:{}".format(test_loss.result(),
                                                                                                  test_accuracy.result(),
                                                                                                  predict_labels,
                                                                                                  test_labels)
                                                                                             )

            file = open(log_dir +"test_result_step" + ".log", "a")
            file.write("test\t")
            file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
            file.write(str(batch) + "\t")
            file.write(str(test_accuracy.result().numpy()) + "\t")
            file.write(str(predict_labels) + "\t")
            file.write(str(test_labels) + "\n")
            file.close()

        print("The accuracy on test set is: {:.3f}%".format(test_accuracy.result()*100))

        file = open(log_dir +"test_result" + ".log","a")
        file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
        file.write(str(test_accuracy.result()) + "\n")
        file.close()


if __name__ == '__main__':
    main(sys.argv)
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...