Я создаю нейронную сеть с 3 входами с 2 скрытыми слоями по 5 нейронов каждый и одним выходом
Заранее извините за отсутствие минимального кода;Я не знаю, где возникла проблема
import numpy as np
def sigmoid(x):
# Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
return 1 / (1 + np.exp(-x))
def deriv_sigmoid(x):
# Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
fx = sigmoid(x)
return fx * (1 - fx)
def mse_loss(y_true, y_pred):
# y_true and y_pred are numpy arrays of the same length.
return ((y_true - y_pred) ** 2).mean()
class OurNeuralNetwork:
def __init__(self):
# Weights
self.w1 = np.random.normal()
self.w2 = np.random.normal()
self.w3 = np.random.normal()
self.w4 = np.random.normal()
self.w5 = np.random.normal()
self.w6 = np.random.normal()
self.w7 = np.random.normal()
self.w8 = np.random.normal()
self.w9 = np.random.normal()
self.w10 = np.random.normal()
self.w11 = np.random.normal()
self.w12 = np.random.normal()
self.w13 = np.random.normal()
self.w14 = np.random.normal()
self.w15 = np.random.normal()
self.w16 = np.random.normal()
self.w17 = np.random.normal()
self.w18 = np.random.normal()
self.w19 = np.random.normal()
self.w20 = np.random.normal()
self.w21 = np.random.normal()
self.w22 = np.random.normal()
self.w23 = np.random.normal()
self.w24 = np.random.normal()
self.w25 = np.random.normal()
self.w26 = np.random.normal()
self.w27 = np.random.normal()
self.w28 = np.random.normal()
self.w29 = np.random.normal()
self.w30 = np.random.normal()
self.w31 = np.random.normal()
self.w32 = np.random.normal()
self.w33 = np.random.normal()
self.w34 = np.random.normal()
self.w35 = np.random.normal()
self.w36 = np.random.normal()
self.w37 = np.random.normal()
self.w38 = np.random.normal()
self.w39 = np.random.normal()
self.w40 = np.random.normal()
self.w41 = np.random.normal()
self.w42 = np.random.normal()
self.w43 = np.random.normal()
self.w44 = np.random.normal()
self.w45 = np.random.normal()
# Biases
self.b1 = np.random.normal()
self.b2 = np.random.normal()
self.b3 = np.random.normal()
self.b4 = np.random.normal()
self.b5 = np.random.normal()
self.b6 = np.random.normal()
self.b7 = np.random.normal()
self.b8 = np.random.normal()
self.b9 = np.random.normal()
self.b10 = np.random.normal()
self.b11 = np.random.normal()
def feedforward(self, x):
# x is a numpy array with 2 elements.
y = []
h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1)
y.append(h1)
h2 = sigmoid(self.w4 * x[0] + self.w4 * x[1] + self.w6 * x[2] + self.b2)
y.append(h2)
h3 = sigmoid(self.w7 * x[0] + self.w8 * x[1] + self.w9 * x[2] + self.b3)
y.append(h3)
h4 = sigmoid(self.w10 * x[0] + self.w11 * x[1] + self.w12 * x[2] + self.b4)
y.append(h4)
h5 = sigmoid(self.w13 * x[0] + self.w14 * x[1] + self.w15 * x[2] + self.b5)
y.append(h5)
y = np.array(y)
return y
def feedforward2(self, y):
hi1 = sigmoid(self.w16 * y[0] + self.w17 * y[1] + self.w18 * y[2] + self.w19 * y[3]+ self.w20 * y[4] + self.b6)
hi2 = sigmoid(self.w21 * y[0] + self.w22 * y[1] + self.w23 * y[2] + self.w24 * y[3]+ self.w25 * y[4] + self.b7)
hi3 = sigmoid(self.w26 * y[0] + self.w27 * y[1] + self.w28 * y[2] + self.w29 * y[3]+ self.w30 * y[4] + self.b8)
hi4 = sigmoid(self.w31 * y[0] + self.w32 * y[1] + self.w33 * y[2] + self.w34 * y[3]+ self.w35 * y[4] + self.b9)
hi5 = sigmoid(self.w36 * y[0] + self.w37 * y[1] + self.w38 * y[2] + self.w39 * y[3]+ self.w40 * y[4] + self.b10)
o1 = sigmoid(self.w41 * hi1 + self.w42 * hi2 + self.w43 * hi3 + self.w44 * hi4 + self.w45 * hi5 + self.b11)
return o1
def train(self, data, all_y_trues):
'''
- data is a (n x 3) numpy array, n = # of samples in the dataset.
- all_y_trues is a numpy array with n elements.
Elements in all_y_trues correspond to those in data.
'''
learn_rate = 0.1
epochs = 1000 # number of times to loop through the entire dataset
for epoch in range(epochs):
for x, y_true in zip(data, all_y_trues):
# --- Do a feedforward (we'll need these values later)
h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1)
h2 = sigmoid(self.w4 * x[0] + self.w4 * x[1] + self.w6 * x[2] + self.b2)
h3 = sigmoid(self.w7 * x[0] + self.w8 * x[1] + self.w9 * x[2] + self.b3)
h4 = sigmoid(self.w10 * x[0] + self.w11 * x[1] + self.w12 * x[2] + self.b4)
h5 = sigmoid(self.w13 * x[0] + self.w14 * x[1] + self.w15 * x[2] + self.b5)
sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1
sum_h2 = self.w4 * x[0] + self.w4 * x[1] + self.w6 * x[2] + self.b2
sum_h3 = self.w7 * x[0] + self.w8 * x[1] + self.w9 * x[2] + self.b3
sum_h4 = self.w10 * x[0] + self.w11 * x[1] + self.w12 * x[2] + self.b4
sum_h5 = self.w13 * x[0] + self.w14 * x[1] + self.w15 * x[2] + self.b5
z = feedforward(self,x)
hi1 = sigmoid(self.w16 * z[0] + self.w17 * z[1] + self.w18 * z[2] + self.w19 * z[3]+ self.w20 * z[4] + self.b6)
hi2 = sigmoid(self.w21 * z[0] + self.w22 * z[1] + self.w23 * z[2] + self.w24 * z[3]+ self.w25 * z[4] + self.b7)
hi3 = sigmoid(self.w26 * z[0] + self.w27 * z[1] + self.w28 * z[2] + self.w29 * z[3]+ self.w30 * z[4] + self.b8)
hi4 = sigmoid(self.w31 * z[0] + self.w32 * z[1] + self.w33 * z[2] + self.w34 * z[3]+ self.w35 * z[4] + self.b9)
hi5 = sigmoid(self.w36 * z[0] + self.w37 * z[1] + self.w38 * z[2] + self.w39 * z[3]+ self.w40 * z[4] + self.b10)
sum_hi1 = self.w16 * z[0] + self.w17 * z[1] + self.w18 * z[2] + self.w19 * z[3]+ self.w20 * z[4] + self.b6
sum_hi2 = self.w21 * z[0] + self.w22 * z[1] + self.w23 * z[2] + self.w24 * z[3]+ self.w25 * z[4] + self.b7
sum_hi3 = self.w26 * z[0] + self.w27 * z[1] + self.w28 * z[2] + self.w29 * z[3]+ self.w30 * z[4] + self.b8
sum_hi4 = self.w31 * z[0] + self.w32 * z[1] + self.w33 * z[2] + self.w34 * z[3]+ self.w35 * z[4] + self.b9
sum_hi5 = self.w36 * z[0] + self.w37 * z[1] + self.w38 * z[2] + self.w39 * z[3]+ self.w40 * z[4] + self.b10
o1 = feedforward(self,z)
sum_o1 = self.w41 * hi1 + self.w42 * hi2 + self.w43 * hi3 + self.w44 * hi4 + self.w45 * hi5 + self.b11
y_pred = o1
# --- Calculate partial derivatives.
# --- Naming: d_L_d_w1 represents "partial L / partial w1"
d_L_d_ypred = -2 * (y_true - y_pred)
# Neuron o1 NEW
d_ypred_d_w41 = hi1 * deriv_sigmoid(sum_o1)
d_ypred_d_w42 = hi2 * deriv_sigmoid(sum_o1)
d_ypred_d_w43 = hi3 * deriv_sigmoid(sum_o1)
d_ypred_d_w44 = hi4 * deriv_sigmoid(sum_o1)
d_ypred_d_w45 = hi5 * deriv_sigmoid(sum_o1)
d_ypred_d_b11 = deriv_sigmoid(sum_o1)
d_ypred_d_hi1 = self.w41 * deriv_sigmoid(sum_o1)
d_ypred_d_hi2 = self.w42 * deriv_sigmoid(sum_o1)
d_ypred_d_hi3 = self.w43 * deriv_sigmoid(sum_o1)
d_ypred_d_hi4 = self.w44 * deriv_sigmoid(sum_o1)
d_ypred_d_hi5 = self.w45 * deriv_sigmoid(sum_o1)
d_ypred_d_h1 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w16 + self.w42 * deriv_sigmoid(sum_hi2) * self.w21 + self.w43 * deriv_sigmoid(sum_hi3) * self.w26 + self.w44 * deriv_sigmoid(sum_hi4) * self.w31 + self.w45 * deriv_sigmoid(sum_hi5) * self.w36)
d_ypred_d_h2 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w17 + self.w42 * deriv_sigmoid(sum_hi2) * self.w22 + self.w43 * deriv_sigmoid(sum_hi3) * self.w27 + self.w44 * deriv_sigmoid(sum_hi4) * self.w32 + self.w45 * deriv_sigmoid(sum_hi5) * self.w37)
d_ypred_d_h3 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w18 + self.w42 * deriv_sigmoid(sum_hi2) * self.w23 + self.w43 * deriv_sigmoid(sum_hi3) * self.w28 + self.w44 * deriv_sigmoid(sum_hi4) * self.w33 + self.w45 * deriv_sigmoid(sum_hi5) * self.w38)
d_ypred_d_h4 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w19 + self.w42 * deriv_sigmoid(sum_hi2) * self.w24 + self.w43 * deriv_sigmoid(sum_hi3) * self.w29 + self.w44 * deriv_sigmoid(sum_hi4) * self.w34 + self.w45 * deriv_sigmoid(sum_hi5) * self.w39)
d_ypred_d_h5 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w20 + self.w42 * deriv_sigmoid(sum_hi2) * self.w25 + self.w43 * deriv_sigmoid(sum_hi3) * self.w30 + self.w44 * deriv_sigmoid(sum_hi4) * self.w35 + self.w45 * deriv_sigmoid(sum_hi5) * self.w40)
# Neuron hi1 NEW
d_hi1_d_w16 = h1 * deriv_sigmoid(sum_hi1)
d_hi1_d_w17 = h2 * deriv_sigmoid(sum_hi1)
d_hi1_d_w18 = h3 * deriv_sigmoid(sum_hi1)
d_hi1_d_w19 = h4 * deriv_sigmoid(sum_hi1)
d_hi1_d_w20 = h5 * deriv_sigmoid(sum_hi1)
d_hi1_d_b6 = deriv_sigmoid(sum_hi1)
# Neuron hi2 NEW
d_hi2_d_w21 = h1 * deriv_sigmoid(sum_hi2)
d_hi2_d_w22 = h2 * deriv_sigmoid(sum_hi2)
d_hi2_d_w23 = h3 * deriv_sigmoid(sum_hi2)
d_hi2_d_w24 = h4 * deriv_sigmoid(sum_hi2)
d_hi2_d_w25 = h5 * deriv_sigmoid(sum_hi2)
d_hi2_d_b7 = deriv_sigmoid(sum_hi2)
# Neuron hi3 NEW
d_hi3_d_w26 = h1 * deriv_sigmoid(sum_hi3)
d_hi3_d_w27 = h2 * deriv_sigmoid(sum_hi3)
d_hi3_d_w28 = h3 * deriv_sigmoid(sum_hi3)
d_hi3_d_w29 = h4 * deriv_sigmoid(sum_hi3)
d_hi3_d_w30 = h5 * deriv_sigmoid(sum_hi3)
d_hi3_d_b8 = deriv_sigmoid(sum_hi3)
# Neuron hi4 NEW
d_hi4_d_w31 = h1 * deriv_sigmoid(sum_hi4)
d_hi4_d_w32 = h2 * deriv_sigmoid(sum_hi4)
d_hi4_d_w33 = h3 * deriv_sigmoid(sum_hi4)
d_hi4_d_w34 = h4 * deriv_sigmoid(sum_hi4)
d_hi4_d_w35 = h5 * deriv_sigmoid(sum_hi4)
d_hi4_d_b9 = deriv_sigmoid(sum_hi4)
#Neuron hi5 NEW
d_hi5_d_w36 = h1 * deriv_sigmoid(sum_hi5)
d_hi5_d_w37 = h2 * deriv_sigmoid(sum_hi5)
d_hi5_d_w38 = h3 * deriv_sigmoid(sum_hi5)
d_hi5_d_w39 = h4 * deriv_sigmoid(sum_hi5)
d_hi5_d_w40 = h5 * deriv_sigmoid(sum_hi5)
d_hi5_d_b10 = deriv_sigmoid(sum_hi5)
#Neuron h1 NEW
d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
d_h1_d_w3 = x[2] * deriv_sigmoid(sum_h1)
d_h1_d_b1 = deriv_sigmoid(sum_h1)
#Neuron h2 NEW
d_h2_d_w4 = x[0] * deriv_sigmoid(sum_h2)
d_h2_d_w5 = x[1] * deriv_sigmoid(sum_h2)
d_h2_d_w6 = x[2] * deriv_sigmoid(sum_h2)
d_h2_d_b2 = deriv_sigmoid(sum_h2)
#Neuron h3 NEW
d_h3_d_w7 = x[0] * deriv_sigmoid(sum_h3)
d_h3_d_w8 = x[1] * deriv_sigmoid(sum_h3)
d_h3_d_w9 = x[2] * deriv_sigmoid(sum_h3)
d_h3_d_b3 = deriv_sigmoid(sum_h3)
#Neuron h4 NEW
d_h4_d_w10 = x[0] * deriv_sigmoid(sum_h4)
d_h4_d_w11 = x[1] * deriv_sigmoid(sum_h4)
d_h4_d_w12 = x[2] * deriv_sigmoid(sum_h4)
d_h4_d_b4 = deriv_sigmoid(sum_h4)
#Neuron h5 NEW
d_h5_d_w13 = x[0] * deriv_sigmoid(sum_h5)
d_h5_d_w14 = x[1] * deriv_sigmoid(sum_h5)
d_h5_d_w15 = x[2] * deriv_sigmoid(sum_h5)
d_h5_d_b5 = deriv_sigmoid(sum_h5)
# --- Update weights and biases
# Neuron h1
self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w3
self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1
# Neuron h2
self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w5
self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w6
self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2
# Neuron h3
self.w7 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w7
self.w8 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w8
self.w9 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w9
self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_b3
# Neuron h4
self.w10 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w10
self.w11 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w11
self.w12 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w12
self.b4 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_b4
# Neuron h5
self.w13 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_w13
self.w14 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_w14
self.w15 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_w15
self.b5 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_b5
# Neuron hi1
self.w16 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w16
self.w17 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w17
self.w18 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w18
self.w19 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w19
self.w20 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w20
self.b6 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_b6
# Neuron hi2
self.w21 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w21
self.w22 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w22
self.w23 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi2_d_w23
self.w24 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w24
self.w25 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w25
self.b7 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_b7
# Neuron hi3
self.w26 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w26
self.w27 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w27
self.w28 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w28
self.w29 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w29
self.w30 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w30
self.b8 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_b8
# Neuron hi4
self.w31 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w31
self.w32 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w32
self.w33 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w33
self.w34 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w34
self.w35 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w35
self.b9 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_b9
# Neuron hi5 NEW
self.w36 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w36
self.w37 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w37
self.w38 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w38
self.w39 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w39
self.w40 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w40
self.b10 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_b10
# Neuron o1 NEW
self.w41 -= learn_rate * d_L_d_ypred * d_ypred_d_w41
self.w42 -= learn_rate * d_L_d_ypred * d_ypred_d_w42
self.w43 -= learn_rate * d_L_d_ypred * d_ypred_d_w43
self.w44 -= learn_rate * d_L_d_ypred * d_ypred_d_w44
self.w45 -= learn_rate * d_L_d_ypred * d_ypred_d_w45
self.b11 -= learn_rate * d_L_d_ypred * d_ypred_d_b11
# --- Calculate total loss at the end of each epoch
if epoch % 10 == 0:
firstlayer = np.apply_along_axis(self.feedforward, 1, data)
y_preds = np.apply_along_axis(self.feedforward2, 1, firstlayer)
loss = mse_loss(all_y_trues, y_preds)
print("Epoch %d loss: %.3f" % (epoch, loss))
# Define dataset
data = np.array([
[-2, -1, 16], # Alice
[25, 6, -5], # Bob
[17, 4, 20], # Charlie
[-15, -6 ,7], # Diana
])
all_y_trues = np.array([
1, # Alice
0, # Bob
0, # Charlie
1, # Diana
])
# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)
ValueError Traceback (последний последний вызов) в 338 # Обучите нашу нейронную сеть! 339 network = OurNeuralNetwork () -> 340 network.train (data, all_y_trues)
в поезде (self, data, all_y_trues) 238 # --- Обновление весов и смещений 239 # Нейрон h1 -> 240self.w1 - = learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1 241 self.w2 - = learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2 242 self.w3 - = многопользовательскийоперанд с формой (5,) не соответствует форме трансляции (5,5) В []: