Почему моя нейронная сеть выдает те же 0,5 выхода для шлюза XOR? - PullRequest
1 голос
/ 07 ноября 2019

Я ищу помощь с моей нейронной сетью, так как она всегда сходится к 0,5. Сеть использует 2 входа: скрытый слой из 2 нейронов и один выходной нейрон. Он использует библиотеку scikit-Cuda для умножения матрицы, numpy для матриц и сигмовидную функцию. Я попытался проверить другие сообщения, но не смог найти решение. Я ценю любую помощь.

Neural.py

import pycuda.autoinit
from pycuda import gpuarray
import numpy as np
from skcuda import linalg, misc
import math
linalg.init()

#move as many calculations to gpu with skcuda misc 

class properties:
    def __init__(self):
        self.sig = "sig"
        self.relu = "relu"
        self.gradientdescent = "gradientdescent"
        self.naturalselection = "naturalselection"

class NeuralNet:
    def __init__(self,inputcount):
        self.trainindata = None
        self.trainoutdata = None
        self.inputcount = inputcount
        self.layerprimus = None

    def addLayer(self,size,func):
        if self.layerprimus == None:
            self.layerprimus = Layer(func,self.inputcount,size)
        else:
            self.layerprimus.addlayer(size,func)
        print("added new ", func," layer")

    def compute(self,indata):
        return self.layerprimus.fowardrun(indata)

    def correct(self,outdata,learnrate,momentum):
        self.layerprimus.update(outdata,learnrate,momentum)

    def train(self,trainIn,trainOut,epoch,learnrate,momentum):
        self.trainindata = trainIn
        self.trainoutdata = trainOut
        for i in range(epoch):
            if (i == (epoch - 1)):
                print("")
            for o in range(len(trainIn)):
                temp = self.compute(trainIn[o])
                self.correct(trainOut[o],learnrate,momentum)
        print("traing done")

    def error(self):
        error = 0.0
        for o in range(len(self.trainindata)):
            out = self.compute(self.trainindata[o])
            for i in range(out.size-1):
                error += (out[i][0]-self.trainoutdata[o][i])**2 
        print("error = ", error/2)

class Layer:
    def __init__(self,func,inputcount,count = 0):
        self.inputcount = inputcount
        self.count = count
        self.func = func
        self.biasStart = 1.0
        #set recusive layer
        self.adjlayer = None
        self.isadjlayer = False
        #the neurons are stored in a matrix
        self.neurons = None
        #the momentum terms have their own matrix
        self.mterm = None
        #the matrix is set if the layer is ready
        if count != 0:
            #sets weights to inbetween -1 and 1
            self.neurons = matrix(count+1,inputcount+1,True)
            self.neurons = scalemultiply(2,self.neurons)
            self.neurons = matrixadd(-1,self.neurons)
            self.mterm = matrix(count+1,inputcount+1)
            #these 'for' loops format the matrix to work for weights and neurons 
            for i in range(inputcount):
                self.neurons[count][i] = 0.0
            self.neurons[count][inputcount] = 1.0
        self.output = None
        self.input = None
        self.Z = None

    def addlayer(self,size,func):
        if self.isadjlayer == False:
            self.isadjlayer = True
            self.adjlayer = Layer(func,self.count,size)
        else:
            self.adjlayer.addlayer(size,func)

    def fowardrun(self,indata): 
        self.input = indata
        if self.isadjlayer == False:
            self.output = matrixdot(self.neurons,indata)
            self.Z = self.output
            for i in range(self.count):
                self.output[i][0] = actfunc(self.func,self.output[i][0])
            return self.output
        else:
            self.output = matrixdot(self.neurons,indata)
            self.Z = self.output
            for i in range(self.count):
                self.output[i][0] = actfunc(self.func,self.output[i][0])
            return self.adjlayer.fowardrun(self.output)

    def update(self,trainout,learnrate,momentum): 
        currentD = matrix(self.inputcount+1,1)
        currentD[self.inputcount][0] = 1.0
        if self.isadjlayer == False:
            #finds cost aka error for each neuron
            IcostD = matrix(self.count+1,1)
            IcostD[self.count][0] = 1
            for i in range(self.count):
                IcostD[i][0] = (self.output[i][0] - trainout[i])
            #finds the derivitrive of the activativation function with output in it
            ActD = matrix(self.count+1,1)
            ActD[self.count][0] = 1.0
            for i in range(self.count):
                ActD[i][0] = actfunc(self.func,self.Z[i][0],True)
            #multiplies the activation derivitives by the derivitive of cost to output
            ActD = matrixdot(squarenpary(IcostD),ActD)
            ActD[self.count][0] = 1.0
            #updating neuron's dependng on their individual inputs
            for i in range(self.count):
                for o in range(self.inputcount):
                    change = (-learnrate * ActD[i][0] * self.input[o][0])
                    self.neurons[i][o] += change + self.mterm[i][o]
                    self.mterm[i][o] = change + (self.mterm[i][o] * momentum)
            #updates bias of each neuron
            for i in range(self.count):
                change = (-learnrate * ActD[i][0])
                self.neurons[i][self.inputcount] +=  change + self.mterm[i][self.inputcount]
                self.mterm[i][self.inputcount] = change + (self.mterm[i][self.inputcount] * momentum)
            #creating currentD to return for previouse layer of neuons
            #finish adding return cost to act part and finish other condition of upper if
            for i in range(self.inputcount):
                num = 0.0
                for o in range(self.count):
                    num += self.neurons[o][i] * ActD[o][0]
                currentD[i][0] = num
            #returns costtoactivation partial derivitive of back neurons 
            return currentD
        else:
            #passes correct train data foward and gets 
            #the parcial derivive of cost over layer output
            IcostD = self.adjlayer.update(trainout,learnrate,momentum)
            #finds the derivitrive of the activativation function with output in it
            ActD = matrix(self.count+1,1)
            ActD[self.count][0] = 1.0
            for i in range(self.count):
                ActD[i][0] = actfunc(self.func,self.Z[i][0],True)
            #multiplies the activation derivitives by the derivitive of cost to output
            ActD = matrixdot(squarenpary(IcostD),ActD)
            ActD[self.count][0] = 1.0
            #updating neuron's dependng on their individual inputs
            for i in range(self.count):
                for o in range(self.inputcount):
                    change = (-learnrate * ActD[i][0] * self.input[o][0])
                    self.neurons[i][o] += change + self.mterm[i][o]
                    #print(self.neurons[i][o])
                    self.mterm[i][o] = change + (self.mterm[i][o] * momentum)
            #updates bias of each neuron
            for i in range(self.count):
                change = (-learnrate * ActD[i][0])
                self.neurons[i][self.inputcount] +=  change + self.mterm[i][self.inputcount]
                self.mterm[i][self.inputcount] = change + (self.mterm[i][self.inputcount] * momentum)
            #creating currentD to return for previouse layer of neuons
            #finish adding return cost to act part and finish other condition of upper if
            num = 0.0
            for i in range(self.inputcount):
                num = 0.0
                for o in range(self.count):
                    num += self.neurons[o][i] * ActD[o][0]
                currentD[i][0] = num
            #returns costtoactivation partial derivative of back neurons 
            return currentD

#function definitions used by the classes
#turns a matrix of one column into a square matrix with values in diagonal pattern
def squarenpary(ary):
    new = matrix(ary.shape[0],ary.shape[0])
    for i in range(ary.shape[0]):
        new[i][i] = ary[i]
    return new
#scales matricies by a number
def scalemultiply(x,y):
    r = gpuarray.to_gpu(y)
    linalg.scale(x,r)
    return r.get()
#returns a matrix custum made
def matrix(x, y,isrand = False,ary = []):
    if isrand:
        return np.random.rand(x, y).astype(np.float)
    elif islistNull(ary) == False:
        return np.array(ary).astype(np.float)
    else:
        return np.zeros(shape = (x,y)).astype(np.float)
#GPU dot multiplicaiton of matricies
def matrixdot(x,y):
    X = gpuarray.to_gpu(x)
    Y = gpuarray.to_gpu(y)
    D = linalg.dot(X,Y)
    return D.get()
#returns a matrix in the GPU
def gpuary(x, y,isrand = False,ary = []):
    if isrand:
        return gpuarray.to_gpu(np.random.rand(x, y).astype(np.float))
    elif ~islistNull(ary):
        return gpuarray.to_gpu(np.array(ary).astype(np.float))
    else:
        return gpuarray.to_gpu(np.zeros(shape = (x,y)).astype(np.float))
#returns weather or not a list, matrix, or array is empty
def islistNull(list1):
    #finds if a list is empty
    if len(list1) == 0:
        return True
    else:
        return False
#an activation function that can be of choice provided and can be its derivitive
def actfunc(func,x,d=False):
    if func == "sig":
        return sigfunc(x,d)
    if func == "relu":
        return relufunc(x,d)
#the sigmoid funciton with choice of its derivitive
def sigfunc(x,d = False):
    if d == False:
        return 1.0 / (1.0+math.e**(-x))
    else:
        num = sigfunc(x)
        return num * (1.0-num)
#the relu function with choice of its derivative
def relufunc(x,d = False):
    if d == False:
        return max(0.0,x)
    else:
        if x > 0.0:
            return 1.0
        else:
            return 0.0
#add constant to matrix
def matrixadd(x,y):

main.py

from Neural import NeuralNet, properties, np

#traindata
C1 = np.array([[1.0],[1.0],[1.0]])
C2 = np.array([[0.0],[0.0],[1.0]])
C3 = np.array([[0.0],[1.0],[1.0]])
C4 = np.array([[1.0],[0.0],[1.0]])

R1 = np.array([0.0])
R2 = np.array([0.0])
R3 = np.array([1.0])
R4 = np.array([1.0])

trainIn = [C1,C2,C3,C4]
trainOut = [R1,R2,R3,R4]

#properties
props = properties()

#configuration
Neuralnetwork = NeuralNet(2)
Neuralnetwork.addLayer(2,props.sig)
Neuralnetwork.addLayer(1,props.sig)

#train and test
print(Neuralnetwork.compute(C1))
print(Neuralnetwork.compute(C2))
print(Neuralnetwork.compute(C3))
print(Neuralnetwork.compute(C4))
#indata,outdata,epoch,learnrate,momentum constant
Neuralnetwork.train(trainIn,trainOut,5000,.000001,.2)
print(Neuralnetwork.compute(C1))
print(Neuralnetwork.compute(C2))
print(Neuralnetwork.compute(C3))
print(Neuralnetwork.compute(C4))
Neuralnetwork.error()

ВЫХОД

added new  sig  layer
added new  sig  layer
[[0.78769443]
 [1.        ]]
[[0.80761663]
 [1.        ]]
[[0.78050864]
 [1.        ]]
[[0.8144971]
 [1.       ]]

traing done
[[0.51667434]
 [1.        ]]
[[0.49184849]
 [1.        ]]
[[0.50271455]
 [1.        ]]
[[0.50677967]
 [1.        ]]
error =  0.4997132119269929
...