Моя нейронная сеть сходится до точки, где она останавливается - PullRequest
0 голосов
/ 02 февраля 2020

Я писал нейронную сеть для своего проекта A-Level по распознаванию рукописных цифр и следил за серией YouTube 3Blue1Brown по математике за нейронными сетями. Я сам написал весь код, но использую теорию, которую объясняет 3Blue1Brown.

Я протестировал нейронную сеть на простом наборе данных, который закомментирован в нижней части файла нейронной сети, и он работает для этого набора данных. Однако, когда я тестирую его на наборе данных MNIST, он снижает стоимость до определенной суммы, ~ 0,35. Несколько прогонов достигли точности 80%, но большинство попыток достигают только 60%. Скорость обучения составляет 0,1, и я добавил импульс после прочтения некоторых ответов о переполнении стека.

Ниже приведен код для нейронной сети:

import matplotlib.pyplot as plt
import utilities as util
import random


class Node:

    def __init__(self, inputNodes, outputNodes, output, bias, number):
        # These will be a lists of tuples, the first item being the node the second being the weight
        self.inputNodes = inputNodes
        self.outputNodes = outputNodes
        # These are the variables used in propagation. I am using the sigmoid function as an activation function.
        self.bias = bias
        self.activationFunction = util.sigmoid
        self.output = output
        self.error = 0
        # Momentum variable used to make sure the node continues on its trend from preivous iterations.
        self.momentum = 0
        self.prevOutput = 0
        # This is an id that stores position in the nodes array
        self.number = number

    def feedForward(self):
        # This just sums the input nodes outputs and then passes that values through an activation function
        self.prevOutput = self.output
        self.output = self.activationFunction(self.sumInputs() + self.bias)

    def sumInputs(self):
        # Loops through all the inputs and sums the weights and activation
        total = 0
        for i in self.inputNodes:
            total += i[1] * i[0].output
        return total


class NeuralNetwork:

    def __init__(self, nodeNumbers, loadFromFile, filePath):
        # This uses the node numbers as an input to generate a network with randomised weights and biases
        self.nodes = [[] for _ in nodeNumbers]
        self.layers = len(nodeNumbers)
        self.generateNetwork(nodeNumbers)
        # This uses the next two parameters to check whether the network is being loaded from a file and if
        # not overwrites to the file specified
        self.path = filePath
        if loadFromFile:
           self.loadFromFile(self.path)
        # This initialises an empty array that can be used to plot data with matplot
        self.costArray = []

    # generates a connected network based on an array of numbers e.g. [784, 16, 16, 10]
    def generateNetwork(self, nodeNumbers):
        # generates an array of nodes that is of the same size specified
        index = -1
        for i in nodeNumbers:
            index += 1
            for x in range(i):
                # initialise nodes with None type inputs and outputs as well as biases and outputs
                self.nodes[index].append(Node([None], [None], random.uniform(0, 1), random.uniform(-10, 10), x))

        # This loops through the generated array and sets the input nodes
        for i in range(self.layers):
            layer = self.nodes[i]
            # To skip the first layer which are the inputs and so don't have input nodes
            if i == 0:
                continue
            # sets inputNodes
            for tgtNode in layer:
                # initialise inputNodes as a None array with length of previous layer
                tgtNode.inputNodes = [None] * len(self.nodes[i - 1])
                # loops through previous layer and sets weight and nodes
                for x in range(len(self.nodes[i - 1])):
                    tgtNode.inputNodes[x] = [self.nodes[i - 1][x], random.uniform(-0.5, 0.5)]

        # This loops through the generated list and sets the output nodes
        for i in range(self.layers):
            layer = self.nodes[i]
            # To skip the last layer which are the output nodes
            if i == self.layers - 1:
                break
            # set outputNodes
            for tgtNode in layer:
                # initialise outputNodes as a None array with length of previous layer
                tgtNode.outputNodes = [None] * len(self.nodes[i + 1])
                # loops through next layer and sets weights and nodes
                for x in range(len(self.nodes[i + 1])):
                    tgtNode.outputNodes[x] = [self.nodes[i + 1][x], self.nodes[i + 1][x].inputNodes[tgtNode.number][1]]

    def saveToFile(self, filePath):
        # This initialises a string that we will write to the file
        stringToWrite = ""
        # Loops through the nodes array to add the inputNodes weights to the string
        for i in range(self.layers):
            layer = self.nodes[i]
            # Skips the first layer which has no inputs
            if i == 0:
                continue
            # I am using the # as a separator in between layers
            stringToWrite += "#"
            # Loops through the layer to get all the nodes
            for node in layer:
                # I am using the ~ as a separator in between nodes
                stringToWrite += "~"
                # Loops through inputNodes to get all the weights
                for data in node.inputNodes:
                    # I am using the , as a separator in between weights
                    stringToWrite += ","
                    stringToWrite += str(data[1])

        # This is the separator between biases and weights
        stringToWrite += "|"
        # Loops through the nodes array to get the layers
        for layers in self.nodes:
            # I am using the # as a separator between layers
            stringToWrite += "#"
            # Loops through the layers to get the nodes
            for node in layers:
                # I am using the ~ as a separator between nodes/biases
                stringToWrite += "~"
                # Appends the bias to the string
                stringToWrite += str(node.bias)

        # Opens the file for overwriting
        file = open(filePath, 'w')
        file.write(stringToWrite)
        file.close()

    def loadFromFile(self, filePath):
        # This opens the file and reads the entire thing as one string
        file = open(filePath, 'r')
        weights = file.read()
        file.close()
        # Splits for in between biases and weights
        weights = weights.split("|")
        # This entire thing loops through the new data and splits it for each different data type
        # The while loops remove the extraneous data that comes from splitting strings
        for i in range(len(weights)):
            # This is specifically for weights
            if i == 0:
                # This splits between layers
                weights[i] = weights[i].split("#")
                for x in range(len(weights[i])):
                    # This splits between nodes
                    weights[i][x] = weights[i][x].split("~")
                    for y in range(len(weights[i][x])):
                        # This splits between weights
                        weights[i][x][y] = weights[i][x][y].split(",")
                        while '' in weights[i][x][y]:
                            weights[i][x][y].remove('')
                    while '' in weights[i][x]:
                        weights[i][x].remove('')
                while '' in weights[i]:
                    weights[i].remove('')
            # This is specifically for biases
            if i == 1:
                # This splits between layers
                weights[i] = weights[i].split("#")
                for x in range(len(weights[i])):
                    # This splits between nodes/biases
                    weights[i][x] = weights[i][x].split("~")
                    while '' in weights[i][x]:
                        weights[i][x].remove('')
                while '' in weights[i]:
                    weights[i].remove('')

        # This is the final method of removing extraneous data
        weights = [x for x in weights if x]

        # With the data taken and pruned I can now add it to the network.
        for i in range(len(weights)):
            # This is for the weights
            if i == 0:
                # Loops through the layers to set the inputNodes
                for x in range(len(weights[i])):
                    # I want to skip the first layer as it has no input Nodes
                    if x == 0:
                        continue
                    # More data pruning
                    weights[i][x] = [a for a in weights[i][x] if a]
                    for y in range(len(weights[i][x])):
                        for z in range(len(weights[i][x][y])):
                            # Sets the inputs Nodes at the same time as converting them to floats
                            self.nodes[x][y].inputNodes[z][1] = float(weights[i][x][y][z])
            if i == 1:
                # More data pruning
                weights[i] = [a for a in weights[i] if a]
                for x in range(len(weights[i])):
                    # More data pruning
                    weights[i][x] = [a for a in weights[i][x] if a]
                    for y in range(len(weights[i][x])):
                        # Sets the input Nodes at the same time as converting them to floats
                        self.nodes[x][y].bias = float(weights[i][x][y])

        for i in range(self.layers):
            layer = self.nodes[i]
            # To skip the last layer which are the output nodes
            if i == self.layers - 1:
                break
            # set outputNodes
            for tgtNode in layer:
                for x in range(len(self.nodes[i + 1])):
                    # loops through next layer and sets weights and nodes.
                    tgtNode.outputNodes[x][1] = self.nodes[i + 1][x].inputNodes[tgtNode.number][1]

    def loadInputs(self, inputArray):
        # Loads an input array into the neural network
        for i in range(len(inputArray)):
            self.nodes[0][i].output = inputArray[i]

    def feedForward(self):
        output = []
        # does all the feed forwards for all the nodes
        for i in range(len(self.nodes)):
            # skips first layer which already has their own outputs
            if i == 0:
                continue
            for node in self.nodes[i]:
                node.feedForward()
        # gets the output for all the output nodes
        for i in self.nodes[len(self.nodes) - 1]:
            output.append(i.output)
        return output

    def backPropagateCost(self, answer, trueValue):
        # Get the output layers error values for usage in the back propagation
        lastLayer = util.outputCostDerivative(answer, trueValue)
        for i in range(len(lastLayer)):
            # Sets the output layers errors
            self.nodes[self.layers - 1][i].error = lastLayer[i]
        # loops through the nodes array to propagate backwards for the error of each node
        for i in range(self.layers - 1, -1, -1):
            # Skips the last layer as it already has its error
            if i == self.layers - 1:
                continue
            # loops through the nodes to set error
            for tgtNode in self.nodes[i]:
                cost = 0
                # Sums the error of the node
                for x in tgtNode.outputNodes:
                    # This multiplies previous nodes error with the weight connecting both of the nodes to get the error
                    # of the node, this is because of the chain rule.
                    cost += x[1] * x[0].error
                tgtNode.momentum = tgtNode.error
                tgtNode.error = cost

    def updateWeights(self, learningPace):
        # This loops through the generated list and sets the input nodes
        momentumPace = 0.5
        for i in range(self.layers):
            layer = self.nodes[i]
            # To skip the first layer which is just inputs and are not actually nodes, so don't have input nodes.
            if i == 0:
                continue
            # updates weights
            for tgtNode in layer:
                # loops through previous layer and sets weight and nodes
                for x in tgtNode.inputNodes:
                    # multiplies the nodes error with the connected nodes output to get the weights specific error
                    # This is temporary code to check out momentum
                    x[1] += learningPace * tgtNode.error * x[0].output + momentumPace * tgtNode.momentum * x[0].prevOutput
                    # x[1] += learningPace * tgtNode.error * x[0].output
                # I already have the bias error so I just multiply it by a constant to get change
                tgtNode.bias += learningPace * tgtNode.error
        # This loops through the generated list and sets the output nodes
        for i in range(self.layers):
            layer = self.nodes[i]
            # To skip the last layer which are the output nodes
            if i == self.layers - 1:
                break
            # set outputNodes
            for tgtNode in layer:
                for x in range(len(self.nodes[i + 1])):
                    # loops through next layer and sets weights and nodes.
                    tgtNode.outputNodes[x][1] = self.nodes[i + 1][x].inputNodes[tgtNode.number][1]

    def trainNetwork(self, trainingData, trainingLabels, epochs, learningPace):
        guess = []
        costArray = []
        costMean = 0
        lp = learningPace
        lowestCost = 0.22
        for x in range(epochs):
            costMean = 0
            for i in range(len(trainingData)):
                # gets guess and true values
                # loads the right input array for the feed forward algorithm
                self.loadInputs(trainingData[i])
                # generates a guess using the feed forward algorithm
                guess = self.feedForward()
                # gets the right answers from the array
                trueValue = trainingLabels[i]
                # back propagates error to get the error of each node
                self.backPropagateCost(guess, trueValue)
                # updates the weights and biases using the error of the nodes
                self.updateWeights(lp)
                cost = util.evaluateCost(guess, trueValue)
                costMean += cost

                if i % 500 == 0:
                    if i != 0:
                        costMean = costMean / 500
                    # costArray.append(costMean)
                    print("Epoch:", x)
                    print("Rep:", i)
                    print("Guess:", guess)
                    print("Answer:", trueValue)
                    print("Cost:", costMean)
                    if costMean <= lowestCost and i != 0:
                        lowestCost = costMean
                        lp = lp / 2
                        print("Saving weights ...")
                        self.saveToFile(self.path)
                    costMean = 0
                    print("-----------------------------------")
                # if i % 10000 == 0:
                #     plt.plot(costArray)
                #     plt.xlabel("Epochs")
                #     plt.ylabel("Cost")
                #     plt.show()

    def testNetwork(self, testData, testLabels):
        right = 0
        # This is for testing percentages of the neural network getting it correctly
        for i in range(len(testData)):
            self.loadInputs(testData[i])
            guess = self.feedForward()
            trueValue = testLabels[i]
            correct = int(trueValue[0])
            runningTotal = 0
            for x in guess:
                if x >= runningTotal:
                    runningTotal = x
                    answer = guess.index(x)

            if correct == answer:
                right += 1
            if i >= 9980:
                img = testData[i].reshape((28, 28))
                plt.imshow(img, cmap="Greys")
                print("Right Answer: ", correct)
                print("Guess: ", answer)
                plt.show()
                print("----------------")
        print("Percentage correct:", (right * 100) / len(testData), "%")


# dataset = [[2.7810836, 2.550537003],
#            [1.465489372, 2.362125076],
#            [3.396561688, 4.400293529],
#            [1.38807019, 1.850220317],
#            [3.06407232, 3.005305973],
#            [7.627531214, 2.759262235],
#            [5.332441248, 2.088626775],
#            [6.922596716, 1.77106367],
#            [8.675418651, -0.242068655],
#            [7.673756466, 3.508563011]]
# # trueValue = [[0.01, 0.99],
# #              [0.01, 0.99],
# #              [0.01, 0.99],
# #              [0.01, 0.99],
# #              [0.01, 0.99],
# #              [0.99, 0.01],
# #              [0.99, 0.01],
# #              [0.99, 0.01],
# #              [0.99, 0.01],
# #              [0.99, 0.01]]
# trueValue = [[0, 1],
#              [0, 1],
#              [0, 1],
#              [0, 1],
#              [0, 1],
#              [1, 0],
#              [1, 0],
#              [1, 0],
#              [1, 0],
#              [1, 0]]
# # count = 10
# # costMean = 0
# # costArray = []
# # nn = NeuralNetwork([2, 3, 2], False)
# # good = False
# # while not good:
# #     nn = NeuralNetwork([2, 3, 2], False, "data/weights.txt")
# #     nn.trainNetwork(dataset, trueValue, 10, 0.5)
# #     for i in dataset:
# #         nn.loadInputs(i)
# #         costMean += nn.evaluateCost(nn.feedForward(), trueValue[dataset.index(i)])
# #     costArray.append(costMean)
# #     if costMean <= 0.05:
# #         nn.saveToFile()
# #         good = True
# #     costMean = 0
# #
# # plt.plot(costArray)
# # plt.ylabel("Cost")
# # plt.xlabel("Attempts")
# # plt.show()
#
# nn = NeuralNetwork([2, 3, 2], False, "data/weights.txt")
# nn.trainNetwork(dataset, trueValue, 100000, 1)
# nn.testNetwork(dataset, trueValue)

Вот класс утилит:

import numpy as np
from numba import vectorize


def sigmoid(x):
    # This is to stop overflows in this function
    if x >= 1000:
        return 1
    elif x <= -1000:
        return 0
    return 1 / (1 + np.exp(-x))


def sigmoidDerivative(x):
    return sigmoid(x) * (1 - sigmoid(x))


def outputCostDerivative(answer, trueValue):
    value = []
    # Loops through the outputs and calculates the derivative of the cost function for each of them
    for i in range(len(answer)):
        # This is intended output
        y = trueValue[i]
        # This is the activation of the node
        a = answer[i]
        # This is the summed value of the weights times the inputs added to the bias of the node
        z = sigmoidDerivative(a)
        # This is the derivative of the cost function
        newCost = 2 * (y - a) * sigmoidDerivative(z)
        value.append(newCost)
    return value


def evaluateCost(answer, trueValue):
    value = 0
    for i in range(len(answer)):
        value += (answer[i] - trueValue[i]) ** 2
    return value

Любая помощь?

...