Этот код строит нейронные сети, которые должны правильно классифицировать цифры в наборе данных MNIST.
В этом коде создается популяция из 10 нейронных сетей. Они оцениваются по функции кросс-энтропийной потери. 5 лучших сохраняются для следующего поколения, а 5 других заменяются «дочерними» сетями, созданными как комбинация из 5 уже сохраненных. Это делается с помощью методики под названием генетический алгоритм
Проблема в том, что при каждом поколении, так как мы хотим каждый раз сохранять лучшие сети, значение потерь для 5 сохраненных сетей всегда должно уменьшаться или оставаться неизменным. Но это не то, что наблюдается, и величина потерь растет и уменьшается от одного поколения к другому и для одних и тех же сетей.
Я предполагаю, что, возможно, проблема в коде или, возможно, в математике, но я не могу ее найти. Я видел, что удаление параметров смещения (b1 и b2) из уравнения функции потерь, кажется, заставляет его вести себя более ожидаемым образом, но я снова не могу понять почему, если что-то не так в реализации смещений или в их мутация (функция mutate_biases ()).
Спасибо за вашу помощь.
Вот три модуля кода:
Модуль 1:
from sklearn.datasets import fetch_mldata
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
def get_mnist():
"""Get MNIST dataset through scikit-learn and pre-process data to make it usable by our classifiers"""
mnist = fetch_mldata('MNIST original')
#X as images - array (70000, 784) corresponding respectively to the number of examples and the number of pixels per image
#y as labels - array (70000,) corresponding to the number of examples, each value is a digit from 0 to 9
X, y = mnist["data"], mnist["target"]
#Normalize pixels of images
X = X / 255
digits = 10
examples = y.shape[0]
# Reshape y as array (1, 70000)
y = y.reshape(1, examples)
"""Create a label array of shape (10, 70000) and replace each digit value from 0 to 9 by value of 1.
Rest of the array composed of zeros.
Allow us to design it the same way as our networks's output array will be, with the maximum value corresponding to what the digit is."""
Y_new = np.eye(digits)[y.astype('int32')]
Y_new = Y_new.T.reshape(digits, examples)
m = 60000
m_test = X.shape[0] - m
#Get images for train set and test set, transposing in array of shape (784, 60000) and (784, 10000)
X_train, X_test = X[:m].T, X[m:].T
#Get labels for train set and test set
Y_train, Y_test = Y_new[:,:m], Y_new[:,m:]
#Shuffle train set to randomize it as it is organized from digits 0 to 9
shuffle_index = np.random.permutation(m)
X_train, Y_train = X_train[:, shuffle_index], Y_train[:, shuffle_index]
return X_train, Y_train, X_test, Y_test
def sigmoid(z):
#sigmoid activation function
s = 1 / (1 + np.exp(-z))
return s
def compute_multiclass_loss(Y, Y_hat):
"""Fitness function: Categorical cross-entropy cost function, used in the case of multi-class outputs."""
L_sum = np.sum(np.multiply(Y, np.log(10**(-15)+Y_hat)))
m = Y.shape[1]
L = -(1/m) * L_sum
return L
def neural_network_evaluator(library, input_layer_to_hidden_layer, hidden_layer_to_output_layer, b1, b2):
"""Function used to: 1/ forwardpropagate the input in a particular neural network
2/ Generate outputs
3/ Determine the cost of fitness function for this network"""
X_train, Y_train, X_test, Y_test = library[0], library[1], library[2], library[3]
#Feedforward for training neural network on training set
Z1 = np.matmul(input_layer_to_hidden_layer,X_train) + b1
A1 = sigmoid(Z1)
Z2 = np.matmul(hidden_layer_to_output_layer,A1) + b2
A2 = np.exp(Z2) / np.sum(np.exp(Z2), axis=0)
cost = compute_multiclass_loss(Y_train, A2)
#For testing purposes on the test set
"""Z1_testset = np.matmul(input_layer_to_hidden_layer,X_test) + b1
A1_testset = sigmoid(Z1_testset)
Z2_testset = np.matmul(hidden_layer_to_output_layer,A1_testset) + b2
A2_testset = np.exp(Z2_testset) / np.sum(np.exp(Z2_testset), axis=0)"""
"""predictions = np.argmax(A2, axis=0) #Change if used for test set
labels = np.argmax(Y_train, axis=0) #Change if used for test set"""
"""print("Confusion matrix is:")
print(confusion_matrix(labels, predictions)) #If you want to display the confusion_matrix for each neural network
print(classification_report(labels,predictions)) #If you want to display the classification_report for each neural network"""
return cost
Модуль 2:
from functools import reduce
from operator import add
import numpy as np
import random
import logging
from train_Neuroevolution_ameliored import neural_network_evaluator
from train_Neuroevolution_ameliored import get_mnist
class Optimizer():
"""Class that implements genetic algorithm for MLP optimization.
Evolving process.
Cross-over and mutation processes.
Also used as neural network creator class triggered AFTER evolution process, and used for multiple purposes.
Neural network object creator.
Populations creator for both pre and post evolution.
Average fitness of populations.
Cost value compiler.
..."""
def __init__(self, number, retain=0.5, random_select=0.0, mutation_rate=0.5):
"""Create an optimizer.
Args:
retain (float): Percentage of population to retain after
each generation
random_select (float): Probability of a rejected network
remaining in the population
mutation_rate (float): Probability a network will be
randomly mutated
...
Initialize our network parameters, for network population after first evolution.
"""
n_x = 784
n_h = 64
self.name = "network number %f" %number
self.mutation_rate = mutation_rate
self.random_select = random_select
self.retain = retain
self.accuracy = 0.
self.b1 = np.zeros((n_h, 1))
self.b1_lines = 64
self.b2 = np.zeros((10, 1))
self.b2_lines = 10
self.input_layer_to_hidden_layer = np.random.randn(n_h, n_x)
self.input_layer_to_hidden_layer_shape_lines = 64
self.input_layer_to_hidden_layer_shape_columns = 784
self.hidden_layer_to_output_layer = np.random.randn(10, n_h)
self.hidden_layer_to_output_layer_shape_lines = 10
self.hidden_layer_to_output_layer_shape_columns = 64
self.network = [[self.input_layer_to_hidden_layer], [self.hidden_layer_to_output_layer], [self.b1], [self.b2]]
def fitness(self, network):
"""Return accuracy, which is our fitness function value after the first evolution."""
return network.accuracy
def breed(self, number, mother, father):
"""Make one child as part as their parents.
Args:
mother (list): Optimizer() object parameters
father (lit): Optimizer() object parameters
Returns:
(list): One network object as an Optimizer() object
"""
child = [0,0,0,0]
# Loop through the parameters and pick params for the kid.
child[0] = random.choice([mother.input_layer_to_hidden_layer, father.input_layer_to_hidden_layer])
child[1] = random.choice([mother.hidden_layer_to_output_layer, father.hidden_layer_to_output_layer])
child[2] = random.choice([mother.b1, father.b1])
child[3] = random.choice([mother.b2, father.b2])
#Create a network object and assign child[list] values to it
network = Optimizer(number)
network.input_layer_to_hidden_layer = child[0]
network.hidden_layer_to_output_layer = child[1]
network.b1 = child[2]
network.b2 = child[3]
network.network = [child[0], child[1], child[2], child[3]]
#Mutate
if network.mutation_rate > random.random():
network.mutate()
network.mutate_biases()
return network
def mutate(self):
"""Two ways of operating mutation on weights.
Mutate every single weight by multiplying each weight by a random number.
Mutate an arbitrary random number of weights (e.g., from 1 to 100) by multiplying each mutated weight by a random number.
The second technique does not seem to work for an undetermined reason"""
#First technique
mutation_weights_2nd_layer = np.random.random((self.input_layer_to_hidden_layer_shape_lines, self.input_layer_to_hidden_layer_shape_columns))
mutation_weights_final_layer = np.random.random((self.hidden_layer_to_output_layer_shape_lines, self.hidden_layer_to_output_layer_shape_columns))
self.input_layer_to_hidden_layer = mutation_weights_2nd_layer*self.input_layer_to_hidden_layer
self.hidden_layer_to_output_layer = mutation_weights_final_layer*self.hidden_layer_to_output_layer
def mutate_biases(self):
"""Mutate biases using the same technique as the second thechnique used for weights"""
random_number_mutated_biases = np.random.randint(low = 1, high = 100)
list_random_indices_lines1 = np.random.randint(low = 0, high = self.b1_lines, size = (random_number_mutated_biases))
list_random_indices_lines2 = np.random.randint(low = 0, high = self.b2_lines, size = (random_number_mutated_biases))
d = 0
for _ in range(random_number_mutated_biases):
i = np.random.uniform(low=-1, high=+1.1)#random number (with arbitrary values between range)
#used for multiplying mutated weight
#Call a particular bias by calling it by its indices, and modify it by adding i
self.b1[list_random_indices_lines1[d]][0] = self.b1[list_random_indices_lines1[d]][0] + i
self.b2[list_random_indices_lines2[d]][0] = self.b2[list_random_indices_lines2[d]][0] + i
d +=1
def evolve(self, pop):
"""Evolve a population of networks.
Args:
pop (list): A list of network parameters
x: a Differentiatior between first phase before first evolution and second phase after first evolution
Returns:
(list): The evolved population of networks
"""
# Get scores for each network.
for network in pop:
print("Name is:", network.name)
number = 1
for network in pop:
network.name = "network number %f" %number
number += 1
graded = [(network.fitness(network), network) for network in pop]
for network in pop:
print("accuracy before =", network.fitness(network))
print("Name is:", network.name)
# Sort on the scores.
graded = [x[1] for x in sorted(graded, key=lambda x: x[0], reverse=False)]
# Get the number we want to keep for the next gen.
retain_length = int(len(graded)*self.retain)
# The parents are every network we want to keep.
parents = graded[:retain_length]
# For those we aren't keeping, randomly keep some anyway.
for individual in graded[retain_length:]:
if self.random_select > random.random():
parents.append(individual)
# Now find out how many spots we have left to fill.
parents_length = len(parents)
desired_length = len(pop) - parents_length
children = []
number = -1
# Add children, which are bred from two remaining networks.
while len(children) < desired_length:
# Get a random mom and dad.
male = random.randint(0, parents_length-1)
female = random.randint(0, parents_length-1)
# Assuming they aren't the same network...
if male != female:
male = parents[male]
female = parents[female]
# Breed them.
baby = self.breed(number, male, female)
# Add the children one at a time.
if len(children) < desired_length:
children.append(baby)
number -= 1
parents.extend(children)
total_nbr_values = 0
for i in pop:
for j in parents:
if i == j:
total_nbr_values +=1
print("same value")
print("total =", total_nbr_values)
for network in parents:
print("accuracy after =", network.fitness(network))
print("Name:", network.name)
return parents
def create_population(self, count):
"""Create a population of random networks.
Args:
count (int): Number of networks to generate, aka the
size of the population
dataset (string): dataset used for the experiment
Returns:
(list): Population of network objects
"""
pop = []
number = 1
for _ in range(0, count):
# Create a random network.
network = Optimizer(number)
number += 1
print("Name is:", network.name)
# Add the network to our population.
pop.append(network)
return pop
def evaluate_neural_network(self, library):
""" Get result of the chosen fitness function as an Optimizer() object.
Accuracy is just a name and does not necessarily mean the actual accuracy."""
self.accuracy = neural_network_evaluator(library, self.input_layer_to_hidden_layer,
self.hidden_layer_to_output_layer, self.b1, self.b2)
print(self.accuracy) #Display network cost value.
Модуль 3:
"""Entry point to evolving the neural network. Start here."""
from train_Neuroevolution_ameliored import get_mnist
from optimizer_Neuroevolution_ameliored import Optimizer
from tqdm import tqdm
def train_networks(networks, library):
"""Train each network.
Args:
networks (list): Current population of networks
"""
pbar = tqdm(total=len(networks))
for network in networks:
network.evaluate_neural_network(library)
print("Name:", network.name)
pbar.update(1)
pbar.close()
def get_average_accuracy(networks):
"""Get the average cost value for a group of networks.
Args:
networks (list): List of networks
Returns:
float: The average cost value of a population of networks.
"""
total_accuracy = 0
for network in networks:
total_accuracy += network.accuracy
return total_accuracy / len(networks)
def generate(generations, population, dataset):
"""Generate a network with the genetic algorithm.
Args:
generations (int): Number of times to evole the population
population (int): Number of networks in each generation
dataset (str): Dataset to use for training/evaluating
"""
#Create an initial population of random networks
library = [0,0,0,0]
library[0], library[1], library[2], library[3] = get_mnist()
number = 1
optimizer = Optimizer(number)
networks = optimizer.create_population(population)
# Evolve, except on the last iteration.
for i in range(generations-1):
train_networks(networks, library)
print("generation", i+1)
networks = optimizer.evolve(networks)
def main():
"""Evolve a network."""
generations = 30 # Number of times to evole the population.
population = 10 # Number of networks in each generation.
dataset = 'mnist' # Dataset
generate(generations, population, dataset)
if __name__ == '__main__':
main()