Реализация Tensorflow VGG16 не учится на cifar-10 - PullRequest
0 голосов
/ 21 февраля 2019

Эта VGGNet была реализована с использованием инфраструктуры Tensorflow с нуля, где все слои определены в коде.Основная проблема, с которой я здесь сталкиваюсь, заключается в том, что точность обучения, не говоря уже о точности валидации, повышается, хотя я и жду ее довольно долго.Я подозреваю, что есть несколько проблем, вызывающих эту проблему прямо сейчас.Во-первых, я думаю, что сеть слишком глубокая и широкая для набора данных cifar-10.Во-вторых, извлечение пакета данных из всего набора данных не является исчерпывающим, то есть выбор пакета используется снова и снова по всему набору данных без исключения тех примеров, которые были выбраны в текущей эпохе.

Однако все же я мог быне заставить этот код работать после многих часов и дней экспериментов.

Я бы хотел извлечь раздел с проблемным кодом, чтобы задать вопрос, но так как я не могу точно указать здесь точный раздел, позвольте мне загрузить весь мой код.

import os
import sys
import tensorflow as tf
import numpy as np
import scipy as sci
import math
import matplotlib.pyplot as plt
import time
import random
import imageio
import pickle
import cv2
import json
from pycocotools.coco import COCO


class SVGG:
    def __init__(self, num_output_classes):
        self.input_layer_size = 0
        self.num_output_classes = num_output_classes

        # Data
        self.X = []
        self.Y = []

        self.working_x = []
        self.working_y = []

        self.testX = []
        self.testY = []

        # hard coded for now. Have to change.
        self.input_data_size = 32 # 32 X 32
        self.input_data_size_flat = 3072 # 32 X 32 X 3 == 3072
        self.num_of_channels = 3 # 3 for colour image

        self.input_data_size = 32  # 32 X 32
        self.input_data_size_flat = self.input_data_size * self.input_data_size  # 32 X 32 X 3 == 3072
        self.num_of_channels = 3  # 3 for colour image

        self.convolution_layers = []
        self.convolution_weights = []
        self.fully_connected_layers = []
        self.fully_connected_weights = []

    def feed_examples(self, input_X, input_Y):
        """
        Feed examples to be learned
        :param input_X: Training dataset X
        :param input_Y: Traning dataset label
        :return:
        """

        # Take first input and calculate its size
        # hard code size
        self.X = input_X
        self.Y = input_Y
        self.input_data_size_flat = len(self.X[0]) * len(self.X[0][0]) * len(self.X[0][0][0])

    def feed_test_data(self, test_X, test_Y):
        self.testX = test_X
        self.testY = test_Y

    def run(self):
        x = tf.placeholder(tf.float32, [None, self.input_data_size_flat], name='x')
        x_data = tf.reshape(x, [-1, self.input_data_size, self.input_data_size, 3])

        y_true = tf.placeholder(tf.float32, [None, self.num_output_classes], name='y_true')
        y_true_cls = tf.argmax(y_true, axis=1)


        """
        VGG layers
        """


        # Create layers
        ######################################## Input Layer ########################################
        input_layer, input_weight = self.create_convolution_layer(x_data, num_input_channels=3, filter_size=3, num_filters=64,
                                                        use_pooling=True) # False

        ######################################## Convolutional Layer ########################################
        ############### Conv Layer 1 #################
        conv_1_1, w_1_1 = self.create_convolution_layer(input=input_layer, num_input_channels=64, filter_size=3, num_filters=64, use_pooling=False)
        conv_1_2, w_1_2 = self.create_convolution_layer(input=conv_1_1, num_input_channels=64, filter_size=3, num_filters=128, use_pooling=True)

        ############### Conv Layer 2 #################
        conv_2_1, w_2_1 = self.create_convolution_layer(input=conv_1_2, num_input_channels=128, filter_size=3, num_filters=128, use_pooling=False)
        conv_2_2, w_2_2 = self.create_convolution_layer(input=conv_2_1, num_input_channels=128, filter_size=3, num_filters=256, use_pooling=True)

        ############### Conv Layer 3 #################
        conv_3_1, w_3_1 = self.create_convolution_layer(input=conv_2_2, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
        conv_3_2, w_3_2 = self.create_convolution_layer(input=conv_3_1, num_input_channels=256, filter_size=3, num_filters=256, use_pooling=False)
        conv_3_3, w_3_3 = self.create_convolution_layer(input=conv_3_2, num_input_channels=256, filter_size=3, num_filters=512, use_pooling=True)

        ############### Conv Layer 4 #################
        conv_4_1, w_4_1 = self.create_convolution_layer(input=conv_3_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_4_2, w_4_2 = self.create_convolution_layer(input=conv_4_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_4_3, w_4_3 = self.create_convolution_layer(input=conv_4_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)

        ############### Conv Layer 5 #################
        conv_5_1, w_5_1 = self.create_convolution_layer(input=conv_4_3, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_5_2, w_5_2 = self.create_convolution_layer(input=conv_5_1, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=False)
        conv_5_3, w_5_3 = self.create_convolution_layer(input=conv_5_2, num_input_channels=512, filter_size=3, num_filters=512, use_pooling=True)

        layer_flat, num_features = self.flatten_layer(conv_5_3)

        ######################################## Fully Connected Layer ########################################
        fc_1 = self.create_fully_connected_layer(input=layer_flat, num_inputs=num_features, num_outputs=4096)
        fc_2 = self.create_fully_connected_layer(input=fc_1, num_inputs=4096, num_outputs=4096)
        fc_3 = self.create_fully_connected_layer(input=fc_2, num_inputs=4096, num_outputs=self.num_output_classes, use_dropout=False)


        # Normalize prediction
        y_prediction = tf.nn.softmax(fc_3)

        # The class-number is the index of the largest element
        y_prediction_class = tf.argmax(y_prediction, axis=1)

        # Cost-Fuction to be optimized
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc_3, labels=y_true)
        # => Now we have a measure of how well the model performs on each image individually. But in order to use the
        # Cross entropy to guide the optimization of the model's variable swe need a single value, so we simply take the
        # Average of the cross-entropy for all the image classifications
        cost = tf.reduce_mean(cross_entropy)

        # Optimizer
        optimizer_adam = tf.train.AdamOptimizer(learning_rate=0.002).minimize(cost)

        # Performance measure
        correct_prediction = tf.equal(y_prediction_class, y_true_cls)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        total_iterations = 0
        num_iterations = 100000

        start_time = time.time()

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())

            for i in range(num_iterations):
                x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=0, high=40000, batch_size=128)
                feed_dict_train = {x: x_batch, y_true: y_true_batch}
                sess.run(optimizer_adam, feed_dict_train)

                if i % 100 == 99:
                    # Calculate the accuracy on the training-set.
                    x_batch, y_true_batch, _ = self.get_batch(X=self.X, Y=self.Y, low=40000, high=50000, batch_size=1000)
                    feed_dict_validate = {x: x_batch, y_true: y_true_batch}
                    acc = sess.run(accuracy, feed_dict=feed_dict_validate)
                    # Message for printing.
                    msg = "Optimization Iteration: {0:>6}, Training Accuracy: {1:>6.1%}"
                    # print(sess.run(y_prediction, feed_dict=feed_dict_train))
                    # print(sess.run(y_prediction_class, feed_dict=feed_dict_train))

                    print(msg.format(i + 1, acc))

                if i % 10000 == 9999:
                    oSaver = tf.train.Saver()
                    oSess = sess
                    path = "./model/_" + "iteration_" + str(i) + ".ckpt"
                    oSaver.save(oSess, path)

                if i == num_iterations - 1:
                    x_batch, y_true_batch, _ = self.get_batch(X=self.testX, Y=self.testY, low=0, high=10000, batch_size=10000)
                    feed_dict_test = {x: x_batch, y_true: y_true_batch}
                    test_accuracy = sess.run(accuracy, feed_dict=feed_dict_test)
                    msg = "Test Accuracy: {0:>6.1%}"
                    print(msg.format(test_accuracy))

    def get_batch(self, X, Y, low=0, high=50000, batch_size=128):
        x_batch = []
        y_batch = np.ndarray(shape=(batch_size, self.num_output_classes))
        index = np.random.randint(low=low, high=high, size=batch_size)

        counter = 0
        for idx in index:
            x_batch.append(X[idx].flatten())
            y_batch[counter] = one_hot_encoded(Y[idx], self.num_output_classes)
            y_batch_cls = Y[idx]
            counter += 1

        return x_batch, y_batch, y_batch_cls

    def generate_new_weights(self, shape):
        w = tf.Variable(tf.truncated_normal(shape, stddev=0.05))
        return w

    def generate_new_biases(self, shape):
        b = tf.Variable(tf.constant(0.05, shape=[shape]))
        return b

    def create_convolution_layer(self, input, num_input_channels, filter_size, num_filters, use_pooling):
        """

        :param input: The previous layer
        :param num_input_channels: Number of channels in previous layer
        :param filter_size: W and H of each filter
        :param num_filters: Number of filters
        :return:
        """
        shape = [filter_size, filter_size, num_input_channels, num_filters]
        weights = self.generate_new_weights(shape)
        biases = self.generate_new_biases(num_filters)

        layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
        layer += biases

        # Max Pooling
        if use_pooling:
            layer = tf.nn.max_pool(layer, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')

        # ReLu. Using elu for better performance
        layer = tf.nn.elu(layer)

        return layer, weights

    def create_fully_connected_layer(self, input, num_inputs, num_outputs, use_dropout=True):
        weights = self.generate_new_weights(shape=[num_inputs, num_outputs])
        biases = self.generate_new_biases(shape=num_outputs)

        layer = tf.matmul(input, weights) + biases

        layer = tf.nn.elu(layer)

        if use_dropout:
            keep_prob = tf.placeholder(tf.float32)
            keep_prob = 0.5
            layer = tf.nn.dropout(layer, keep_prob)

        return layer

    def flatten_layer(self, layer):
        """
                Flattens dimension that is output by a convolution layer.
                Flattening is need to feed into a fully-connected-layer.
                :param layer:
                :return:
        """
        # shape [num_images, img_height, img_width, num_channels]
        layer_shape = layer.get_shape()

        # Number of features h x w x channels
        num_features = layer_shape[1: 4].num_elements()

        # Reshape
        layer_flat = tf.reshape(layer, [-1, num_features])

        # Shape is now [num_images, img_height * img_width * num_channels]
        return layer_flat, num_features


def unpickle(file):
    with open(file, 'rb') as file:
        dict = pickle.load(file, encoding='bytes')

    return dict

def convert_to_individual_image(flat):

    img_R = flat[0:1024].reshape((32, 32))
    img_G = flat[1024:2048].reshape((32, 32))
    img_B = flat[2048:3072].reshape((32, 32))

    #B G R
    mean = [125.3, 123.0, 113.9]

    img = np.dstack((img_R - mean[0], img_G - mean[1], img_B - mean[2]))
    img = np.array(img)
    # img = cv2.resize(img, (224, 224), img)

    return img


def read_coco_data(img_path, annotation_path):
    coco = COCO(annotation_path)
    ids = list(coco.imgs.keys())
    ann_keys = list(coco.anns.keys())

    print(coco.imgs[ids[0]])
    print(coco.anns[ann_keys[0]])


def one_hot_encoded(class_numbers, num_classes=None):
    if num_classes is None:
        num_classes = np.max(class_numbers) + 1

    return np.eye(num_classes, dtype=float)[class_numbers]


if __name__ == '__main__':
    data = []
    labels = []

    val_data = []
    val_label = []

    # cifar-10
    counter = 0
    for i in range(1, 6):
        unpacked = unpickle("./cifar10/data_batch_" + str(i))
        tmp_data = unpacked[b'data']
        tmp_label = unpacked[b'labels']

        inner_counter = 0
        for flat in tmp_data:
            converted = convert_to_individual_image(flat)
            data.append(converted)
            labels.append(tmp_label[inner_counter])
            counter += 1
            inner_counter += 1
            cv2.imwrite("./img/" + str(counter) + ".jpg", converted)

    # Test data
    unpacked = unpickle("./cifar10/test_batch")
    test_data = []
    test_data_flat = unpacked[b'data']
    test_label = unpacked[b'labels']

    for flat in test_data_flat:
        test_data.append(convert_to_individual_image(flat))

    svgg = SVGG(10)
    svgg.feed_examples(input_X=data, input_Y=labels)
    svgg.feed_test_data(test_X=test_data, test_Y=test_label)
    svgg.run()
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...