Question

Этот код строит нейронные сети, которые должны правильно классифицировать цифры в наборе данных MNIST.

В этом коде создается популяция из 10 нейронных сетей. Они оцениваются по функции кросс-энтропийной потери. 5 лучших сохраняются для следующего поколения, а 5 других заменяются «дочерними» сетями, созданными как комбинация из 5 уже сохраненных. Это делается с помощью методики под названием генетический алгоритм

Проблема в том, что при каждом поколении, так как мы хотим каждый раз сохранять лучшие сети, значение потерь для 5 сохраненных сетей всегда должно уменьшаться или оставаться неизменным. Но это не то, что наблюдается, и величина потерь растет и уменьшается от одного поколения к другому и для одних и тех же сетей.

Я предполагаю, что, возможно, проблема в коде или, возможно, в математике, но я не могу ее найти. Я видел, что удаление параметров смещения (b1 и b2) из уравнения функции потерь, кажется, заставляет его вести себя более ожидаемым образом, но я снова не могу понять почему, если что-то не так в реализации смещений или в их мутация (функция mutate_biases ()).

Спасибо за вашу помощь.

Вот три модуля кода:

Модуль 1:

from sklearn.datasets import fetch_mldata
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np



def get_mnist():
    """Get MNIST dataset through scikit-learn and pre-process data to make it usable by our classifiers"""

    mnist = fetch_mldata('MNIST original')

    #X as images - array (70000, 784) corresponding respectively to the number of examples and the number of pixels per image
    #y as labels  - array (70000,) corresponding to the number of examples, each value is a digit from 0 to 9
    X, y = mnist["data"], mnist["target"]

    #Normalize pixels of images
    X = X / 255


    digits = 10
    examples = y.shape[0]

    # Reshape y as array (1, 70000)
    y = y.reshape(1, examples)

    """Create a label array of shape (10, 70000) and replace each digit value from 0 to 9 by value of 1.
        Rest of the array composed of zeros.
    Allow us to design it the same way as our networks's output array will be, with the maximum value corresponding to what the digit is."""
    Y_new = np.eye(digits)[y.astype('int32')]
    Y_new = Y_new.T.reshape(digits, examples)

    m = 60000
    m_test = X.shape[0] - m

    #Get images for train set and test set, transposing in array of shape (784, 60000) and (784, 10000)
    X_train, X_test = X[:m].T, X[m:].T

    #Get labels for train set and test set
    Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]

    #Shuffle train set to randomize it as it is organized from digits 0 to 9
    shuffle_index = np.random.permutation(m)
    X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]


    return X_train, Y_train, X_test, Y_test


def sigmoid(z):
    #sigmoid activation function
    s = 1 / (1 + np.exp(-z))
    return s


def compute_multiclass_loss(Y, Y_hat):
    """Fitness function: Categorical cross-entropy cost function, used in the case of multi-class outputs."""
    L_sum = np.sum(np.multiply(Y, np.log(10**(-15)+Y_hat)))
    m = Y.shape[1]
    L = -(1/m) * L_sum

    return L




def neural_network_evaluator(library, input_layer_to_hidden_layer, hidden_layer_to_output_layer, b1, b2):
    """Function used to: 1/ forwardpropagate the input in a particular neural network
    2/ Generate outputs
    3/ Determine the cost of fitness function for this network"""

    X_train, Y_train, X_test, Y_test = library[0], library[1], library[2], library[3]


    #Feedforward for training neural network on training set
    Z1 = np.matmul(input_layer_to_hidden_layer,X_train) + b1
    A1 = sigmoid(Z1)
    Z2 = np.matmul(hidden_layer_to_output_layer,A1) + b2
    A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)

    cost = compute_multiclass_loss(Y_train, A2)


    #For testing purposes on the test set
    """Z1_testset = np.matmul(input_layer_to_hidden_layer,X_test) + b1
    A1_testset = sigmoid(Z1_testset)
    Z2_testset = np.matmul(hidden_layer_to_output_layer,A1_testset) + b2
    A2_testset = np.exp(Z2_testset) / np.sum(np.exp(Z2_testset), axis=0)"""


    """predictions = np.argmax(A2, axis=0) #Change if used for test set
    labels = np.argmax(Y_train, axis=0) #Change if used for test set"""

    """print("Confusion matrix is:")
    print(confusion_matrix(labels, predictions)) #If you want to display the confusion_matrix for each neural network
    print(classification_report(labels,predictions)) #If you want to display the classification_report for each neural network"""



    return cost

Модуль 2:

from functools import reduce
from operator import add
import numpy as np
import random
import logging
from train_Neuroevolution_ameliored import neural_network_evaluator
from train_Neuroevolution_ameliored import get_mnist

class Optimizer():
    """Class that implements genetic algorithm for MLP optimization.
        Evolving process.
        Cross-over and mutation processes.

        Also used as neural network creator class triggered AFTER evolution process, and used for multiple purposes.
        Neural network object creator.
        Populations creator for both pre and post evolution.
        Average fitness of populations.
        Cost value compiler.
        ..."""

    def __init__(self, number, retain=0.5, random_select=0.0, mutation_rate=0.5):
        """Create an optimizer.
        Args:
            retain (float): Percentage of population to retain after
                each generation
            random_select (float): Probability of a rejected network
                remaining in the population
            mutation_rate (float): Probability a network will be
                randomly mutated
            ...

        Initialize our network parameters, for network population after first evolution.
        """
        n_x = 784
        n_h = 64


        self.name = "network number %f" %number
        self.mutation_rate = mutation_rate
        self.random_select = random_select
        self.retain = retain
        self.accuracy = 0.
        self.b1 = np.zeros((n_h, 1))
        self.b1_lines = 64
        self.b2 = np.zeros((10, 1))
        self.b2_lines = 10
        self.input_layer_to_hidden_layer = np.random.randn(n_h, n_x)
        self.input_layer_to_hidden_layer_shape_lines = 64
        self.input_layer_to_hidden_layer_shape_columns = 784
        self.hidden_layer_to_output_layer = np.random.randn(10, n_h)
        self.hidden_layer_to_output_layer_shape_lines = 10
        self.hidden_layer_to_output_layer_shape_columns = 64
        self.network = [[self.input_layer_to_hidden_layer], [self.hidden_layer_to_output_layer], [self.b1], [self.b2]]




    def fitness(self, network):
        """Return accuracy, which is our fitness function value after the first evolution."""

        return network.accuracy



    def breed(self, number, mother, father):
        """Make one child as part as their parents.
        Args:
            mother (list): Optimizer() object parameters
            father (lit): Optimizer() object parameters
        Returns:
            (list): One network object as an Optimizer() object
        """



        child = [0,0,0,0]


        # Loop through the parameters and pick params for the kid.

        child[0] = random.choice([mother.input_layer_to_hidden_layer, father.input_layer_to_hidden_layer])

        child[1] = random.choice([mother.hidden_layer_to_output_layer, father.hidden_layer_to_output_layer])

        child[2] = random.choice([mother.b1, father.b1])

        child[3] = random.choice([mother.b2, father.b2])


        #Create a network object and assign child[list] values to it
        network = Optimizer(number)
        network.input_layer_to_hidden_layer = child[0]
        network.hidden_layer_to_output_layer = child[1]
        network.b1 = child[2]
        network.b2 = child[3]
        network.network = [child[0], child[1], child[2], child[3]]


        #Mutate
        if network.mutation_rate > random.random():
            network.mutate()
            network.mutate_biases()





        return network



    def mutate(self):
        """Two ways of operating mutation on weights.
            Mutate every single weight by multiplying each weight by a random number.
            Mutate an arbitrary random number of weights (e.g., from 1 to 100) by multiplying each mutated weight by a random number.
            The second technique does not seem to work for an undetermined reason"""


        #First technique
        mutation_weights_2nd_layer = np.random.random((self.input_layer_to_hidden_layer_shape_lines, self.input_layer_to_hidden_layer_shape_columns))
        mutation_weights_final_layer = np.random.random((self.hidden_layer_to_output_layer_shape_lines, self.hidden_layer_to_output_layer_shape_columns))

        self.input_layer_to_hidden_layer = mutation_weights_2nd_layer*self.input_layer_to_hidden_layer
        self.hidden_layer_to_output_layer = mutation_weights_final_layer*self.hidden_layer_to_output_layer





    def mutate_biases(self):
        """Mutate biases using the same technique as the second thechnique used for weights"""



        random_number_mutated_biases = np.random.randint(low = 1, high = 100)

        list_random_indices_lines1 = np.random.randint(low = 0, high = self.b1_lines, size = (random_number_mutated_biases))
        list_random_indices_lines2 = np.random.randint(low = 0, high = self.b2_lines, size = (random_number_mutated_biases))

        d = 0

        for _ in range(random_number_mutated_biases):

            i = np.random.uniform(low=-1, high=+1.1)#random number (with arbitrary values between range)
                                                        #used for multiplying mutated weight

            #Call a particular bias by calling it by its indices, and modify it by adding i
            self.b1[list_random_indices_lines1[d]][0] = self.b1[list_random_indices_lines1[d]][0] + i
            self.b2[list_random_indices_lines2[d]][0] = self.b2[list_random_indices_lines2[d]][0] + i

            d +=1





    def evolve(self, pop):
        """Evolve a population of networks.
        Args:
            pop (list): A list of network parameters
            x: a Differentiatior between first phase before first evolution and second phase after first evolution
        Returns:
            (list): The evolved population of networks
        """


        # Get scores for each network.
        for network in pop:
            print("Name is:", network.name)

        number = 1
        for network in pop:
            network.name = "network number %f" %number
            number += 1

        graded = [(network.fitness(network), network) for network in pop]




        for network in pop:
            print("accuracy before =", network.fitness(network))
            print("Name is:", network.name)

        # Sort on the scores.
        graded = [x[1] for x in sorted(graded, key=lambda x: x[0], reverse=False)]



        # Get the number we want to keep for the next gen.
        retain_length = int(len(graded)*self.retain)

        # The parents are every network we want to keep.
        parents = graded[:retain_length]


        # For those we aren't keeping, randomly keep some anyway.
        for individual in graded[retain_length:]:
            if self.random_select > random.random():
                parents.append(individual)

        # Now find out how many spots we have left to fill.
        parents_length = len(parents)
        desired_length = len(pop) - parents_length
        children = []
        number = -1
        # Add children, which are bred from two remaining networks.
        while len(children) < desired_length:

            # Get a random mom and dad.
            male = random.randint(0, parents_length-1)
            female = random.randint(0, parents_length-1)

            # Assuming they aren't the same network...
            if male != female:
                male = parents[male]
                female = parents[female]

                # Breed them.
                baby = self.breed(number, male, female)

                # Add the children one at a time.
                if len(children) < desired_length:
                        children.append(baby)
                        number -= 1



        parents.extend(children)



        total_nbr_values = 0
        for i in pop:
            for j in parents:
                if i == j:
                    total_nbr_values +=1
                    print("same value")

        print("total =", total_nbr_values)


        for network in parents:
            print("accuracy after =", network.fitness(network))
            print("Name:", network.name)




        return parents





    def create_population(self, count):
        """Create a population of random networks.
        Args:
            count (int): Number of networks to generate, aka the
                size of the population
            dataset (string): dataset used for the experiment
        Returns:
            (list): Population of network objects
        """
        pop = []
        number = 1
        for _ in range(0, count):

            # Create a random network.
            network = Optimizer(number)
            number += 1
            print("Name is:", network.name)
            # Add the network to our population.
            pop.append(network)



        return pop

    def evaluate_neural_network(self, library):
        """ Get result of the chosen fitness function as an Optimizer() object.
            Accuracy is just a name and does not necessarily mean the actual accuracy."""

        self.accuracy = neural_network_evaluator(library, self.input_layer_to_hidden_layer,
                            self.hidden_layer_to_output_layer, self.b1, self.b2)

        print(self.accuracy) #Display network cost value.

Модуль 3:

"""Entry point to evolving the neural network. Start here."""
from train_Neuroevolution_ameliored import get_mnist
from optimizer_Neuroevolution_ameliored import Optimizer
from tqdm import tqdm



def train_networks(networks, library):
    """Train each network.
    Args:
        networks (list): Current population of networks
    """

    pbar = tqdm(total=len(networks))
    for network in networks:
        network.evaluate_neural_network(library)
        print("Name:", network.name)
        pbar.update(1)
    pbar.close()



def get_average_accuracy(networks):
    """Get the average cost value for a group of networks.
    Args:
        networks (list): List of networks
    Returns:
        float: The average cost value of a population of networks.
    """

    total_accuracy = 0
    for network in networks:
        total_accuracy += network.accuracy

    return total_accuracy / len(networks)



def generate(generations, population, dataset):
    """Generate a network with the genetic algorithm.
    Args:
        generations (int): Number of times to evole the population
        population (int): Number of networks in each generation
        dataset (str): Dataset to use for training/evaluating
    """

    #Create an initial population of random networks
    library = [0,0,0,0]
    library[0], library[1], library[2], library[3] = get_mnist()
    number = 1
    optimizer = Optimizer(number)
    networks = optimizer.create_population(population)


    # Evolve, except on the last iteration.
    for i in range(generations-1):

        train_networks(networks, library)

        print("generation", i+1)


        networks = optimizer.evolve(networks)




def main():
    """Evolve a network."""
    generations = 30  # Number of times to evole the population.
    population = 10  # Number of networks in each generation.
    dataset = 'mnist' # Dataset


    generate(generations, population, dataset)

if __name__ == '__main__':
    main()

Застрял на том, почему этот код не показывает улучшение значений потерь в функции потерь

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Застрял на том, почему этот код не показывает улучшение значений потерь в функции потерь

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы