Я пытаюсь реализовать нейронные сети с numpy, основываясь на этом посте: https://medium.com/datadriveninvestor/math-neural-network-from-scratch-in-python-d6da9f29ce65
Моя проблема начинается, когда я пытаюсь реализовать мини-пакетный градиентный спуск. Сеть выводит несколько прогнозов для каждой точки данных (выводит то же число, что и размер пакета) вместо 1 для каждой точки данных. Я копирую весь соответствующий код, я не могу найти проблему, может быть, вы, ребята, видите то, что я не смог. Я считаю, что проблема в подходящем методе.
Большое спасибо
class Dense:
def __init__(self, feat_size, out_size):
# remember weights -> (inp, out) size
self.weights = (np.random.normal(0, 1, feat_size*out_size)*np.sqrt(2/feat_size)).reshape(feat_size, out_size)
self.bias = np.random.rand(1, out_size) - 0.5
def forward(self, input_data):
self.input = input_data
self.output = np.dot(self.input, self.weights) + self.bias
return(self.output)
# note that backward takes output_der as argument which will come from next layer
# last layer in our network will just take the derivative of loss function with respect to
# its prediction
def backward(self, output_der, lr): # output_der = dE/dY
input_der = np.dot(output_der, self.weights.T)
weight_der = np.dot(self.input.T.reshape(-1, self.input.shape[0]), output_der)
# note that der. with respect to bias is output_der
# updating parameters
self.weights -= lr*weight_der
self.bias -= lr*output_der
return(input_der) # we will need this for the prev. layer(its prev layer's output_der)
class ActLayer:
def __init__(self, act, act_prime):
self.act = act
self.act_prime = act_prime
def forward(self, input_data):
self.input = input_data
self.output = self.act(self.input)
return(self.output)
# Note that we r not updating anything here
# we need this because it will be output_der for dense layer
# we gave lr as parameter because we will define fit method in a way all layers will require it
def backward(self, output_der, lr):
return(self.act_prime(self.input)*output_der)
def shuffle_batch(X, y, batch_size):
rnd_idx = np.random.permutation(len(X))
n_batches = len(X) // batch_size
for batch_idx in np.array_split(rnd_idx, n_batches):
X_batch, y_batch = X[batch_idx], y[batch_idx]
return X_batch, y_batch
class Network:
def __init__(self, loss, loss_prime):
self.layers = []
self.loss = loss
self.loss_prime = loss_prime
# will add layers to graph
def add(self, layer):
self.layers.append(layer)
# implementin only forward-pass for prediction
def predict(self, input_data):
result = []
self.input_data = input_data
for a in self.input_data:
layer_output = a
for layer in self.layers:
layer_output = layer.forward(layer_output)
result.append(layer_output)
return(result)
# Training
def fit(self, X_train, y_train, epochs, lr, batch_size):
for a in range(epochs):
err = 0
layer_output, final_out = shuffle_batch(X_train, y_train, batch_size)
for layer in self.layers:
layer_output = layer.forward(layer_output)
err += self.loss(final_out, layer_output)
# backprop note that looping over layers in reverse order
# initialy we will give derivative of loss with respect to prediction
# which is input_der of loss function correspond to output_der of prev. layer
# then prev layer will output its input_der whic is output_der of prev layer and so on
gradient = self.loss_prime(final_out, layer_output)
# this loop is the reason we gave lr to activation layer as argument
for layer in reversed(self.layers):
gradient = layer.backward(gradient, lr)
err /= len(X_train)
print('epoch %d/%d error=%f' % (a+1, epochs, err))
"""
"""
x_train = np.array([[[0,0]], [[0,1]], [[1,0]], [[1,1]]])
y_train = np.array([[[0]], [[1]], [[1]], [[0]]])
x_train = x_train.reshape(-1, 2)
y_train = y_train.reshape(-1, 1)
# network
net = Network(mse, mse_prime)
net.add(Dense(2, 3))
net.add(ActLayer(relu, relu_prime))
net.add(Dense(3, 1))
#net.add(ActLayer(relu, relu_prime))
# train
net.fit(x_train, y_train, epochs=1000, lr=0.1)
# test
out = net.predict(x_train)
print(out)
"""
Output of "print":
[array([[-1.01282012e-07],
[ 7.35202957e-08]]), array([[0.99999994],
[1.00000026]]), array([[0.99999984],
[1.00000002]]), array([[ 9.04277717e-08],
[-2.05733598e-07]])]
It should make 1 prediction per data point but its making number of batch size predictions per data point