Нейронные сети с нуля, проблема с методом подбора, когда я пытаюсь использовать мини-пакетный градиентный спуск - PullRequest
0 голосов
/ 09 февраля 2020

Я пытаюсь реализовать нейронные сети с numpy, основываясь на этом посте: https://medium.com/datadriveninvestor/math-neural-network-from-scratch-in-python-d6da9f29ce65

Моя проблема начинается, когда я пытаюсь реализовать мини-пакетный градиентный спуск. Сеть выводит несколько прогнозов для каждой точки данных (выводит то же число, что и размер пакета) вместо 1 для каждой точки данных. Я копирую весь соответствующий код, я не могу найти проблему, может быть, вы, ребята, видите то, что я не смог. Я считаю, что проблема в подходящем методе.

Большое спасибо

class Dense:
    def __init__(self, feat_size, out_size):
        # remember weights -> (inp, out) size
        self.weights = (np.random.normal(0, 1, feat_size*out_size)*np.sqrt(2/feat_size)).reshape(feat_size, out_size)
        self.bias = np.random.rand(1, out_size) - 0.5
    def forward(self, input_data):
        self.input = input_data
        self.output = np.dot(self.input, self.weights) + self.bias
        return(self.output)
    # note that backward takes output_der as argument which will come from next layer
    # last layer in our network will just take the derivative of loss function with respect to 
    # its prediction
    def backward(self, output_der, lr): # output_der = dE/dY
        input_der = np.dot(output_der, self.weights.T)
        weight_der = np.dot(self.input.T.reshape(-1, self.input.shape[0]), output_der)
        # note that der. with respect to bias is output_der
        # updating parameters
        self.weights -= lr*weight_der
        self.bias -= lr*output_der
        return(input_der) # we will need this for the prev. layer(its prev layer's output_der)

class ActLayer:
    def __init__(self, act, act_prime):
        self.act = act
        self.act_prime = act_prime

    def forward(self, input_data):
        self.input = input_data
        self.output = self.act(self.input)
        return(self.output)

    # Note that we r not updating anything here 
    # we need this because it will be output_der for dense layer
    # we gave lr as parameter because we will define fit method in a way all layers will require it
    def backward(self, output_der, lr):
        return(self.act_prime(self.input)*output_der)

def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        return X_batch, y_batch

class Network:
    def __init__(self, loss, loss_prime):
        self.layers = []
        self.loss = loss
        self.loss_prime = loss_prime

    # will add layers to graph 
    def add(self, layer):
        self.layers.append(layer)

    # implementin only forward-pass for prediction   
    def predict(self, input_data):
        result = []
        self.input_data = input_data
        for a in self.input_data:
            layer_output = a
            for layer in self.layers:
                layer_output = layer.forward(layer_output)
            result.append(layer_output)

        return(result)

    # Training
    def fit(self, X_train, y_train, epochs, lr, batch_size):


        for a in range(epochs):

            err = 0

            layer_output, final_out = shuffle_batch(X_train, y_train, batch_size)
            for layer in self.layers:
                layer_output = layer.forward(layer_output)

            err += self.loss(final_out, layer_output)

                # backprop note that looping over layers in reverse order
                # initialy we will give derivative of loss with respect to prediction
                # which is input_der of loss function correspond to output_der of prev. layer
                # then prev layer will output its input_der whic is output_der of prev layer and so on

            gradient = self.loss_prime(final_out, layer_output)
                # this loop is the reason we gave lr to activation layer as argument
            for layer in reversed(self.layers):
                gradient = layer.backward(gradient, lr)

            err /= len(X_train) 
            print('epoch %d/%d   error=%f' % (a+1, epochs, err))
                """
""" 
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
x_train = x_train.reshape(-1, 2)
y_train = y_train.reshape(-1, 1)

# network
net = Network(mse, mse_prime)
net.add(Dense(2, 3))
net.add(ActLayer(relu, relu_prime))
net.add(Dense(3, 1))
#net.add(ActLayer(relu, relu_prime))
# train
net.fit(x_train, y_train, epochs=1000, lr=0.1)

# test
out = net.predict(x_train)
print(out) 
"""

Output of "print":


[array([[-1.01282012e-07],
       [ 7.35202957e-08]]), array([[0.99999994],
       [1.00000026]]), array([[0.99999984],
       [1.00000002]]), array([[ 9.04277717e-08],
       [-2.05733598e-07]])]

It should make 1 prediction per data point but its making number of batch size predictions per data point

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...