Неразмещаемый выходной операнд с формой (5,) не соответствует форме трансляции (5,5) - PullRequest
0 голосов
/ 11 ноября 2019

Я создаю нейронную сеть с 3 входами с 2 скрытыми слоями по 5 нейронов каждый и одним выходом

Заранее извините за отсутствие минимального кода;Я не знаю, где возникла проблема

import numpy as np

def sigmoid(x):
    # Sigmoid activation function: f(x) = 1 / (1 + e^(-x))
    return 1 / (1 + np.exp(-x))

def deriv_sigmoid(x):
    # Derivative of sigmoid: f'(x) = f(x) * (1 - f(x))
    fx = sigmoid(x)
    return fx * (1 - fx)

def mse_loss(y_true, y_pred):
    # y_true and y_pred are numpy arrays of the same length.
    return ((y_true - y_pred) ** 2).mean()

class OurNeuralNetwork:
    def __init__(self):
        # Weights
        self.w1 = np.random.normal()
        self.w2 = np.random.normal()
        self.w3 = np.random.normal()
        self.w4 = np.random.normal()
        self.w5 = np.random.normal()
        self.w6 = np.random.normal()
        self.w7 = np.random.normal()
        self.w8 = np.random.normal()
        self.w9 = np.random.normal()
        self.w10 = np.random.normal()
        self.w11 = np.random.normal()
        self.w12 = np.random.normal()
        self.w13 = np.random.normal()
        self.w14 = np.random.normal()
        self.w15 = np.random.normal()
        self.w16 = np.random.normal()
        self.w17 = np.random.normal()
        self.w18 = np.random.normal()
        self.w19 = np.random.normal()
        self.w20 = np.random.normal()
        self.w21 = np.random.normal()
        self.w22 = np.random.normal()
        self.w23 = np.random.normal()
        self.w24 = np.random.normal()
        self.w25 = np.random.normal()
        self.w26 = np.random.normal()
        self.w27 = np.random.normal()
        self.w28 = np.random.normal()
        self.w29 = np.random.normal()
        self.w30 = np.random.normal()
        self.w31 = np.random.normal()
        self.w32 = np.random.normal()
        self.w33 = np.random.normal()
        self.w34 = np.random.normal()
        self.w35 = np.random.normal()
        self.w36 = np.random.normal()
        self.w37 = np.random.normal()
        self.w38 = np.random.normal()
        self.w39 = np.random.normal()
        self.w40 = np.random.normal()
        self.w41 = np.random.normal()
        self.w42 = np.random.normal()
        self.w43 = np.random.normal()
        self.w44 = np.random.normal()
        self.w45 = np.random.normal()




        # Biases
        self.b1 = np.random.normal()
        self.b2 = np.random.normal()
        self.b3 = np.random.normal()
        self.b4 = np.random.normal()
        self.b5 = np.random.normal()
        self.b6 = np.random.normal()
        self.b7 = np.random.normal()
        self.b8 = np.random.normal()
        self.b9 = np.random.normal()
        self.b10 = np.random.normal()
        self.b11 = np.random.normal()

    def feedforward(self, x):
        # x is a numpy array with 2 elements.
        y = []
        h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1)
        y.append(h1)
        h2 = sigmoid(self.w4 * x[0] + self.w4 * x[1] + self.w6 * x[2] + self.b2)
        y.append(h2)
        h3 = sigmoid(self.w7 * x[0] + self.w8 * x[1] + self.w9 * x[2] + self.b3)
        y.append(h3)
        h4 = sigmoid(self.w10 * x[0] + self.w11 * x[1] + self.w12 * x[2] + self.b4)
        y.append(h4)
        h5 = sigmoid(self.w13 * x[0] + self.w14 * x[1] + self.w15 * x[2] + self.b5)
        y.append(h5)
        y = np.array(y)
        return y
    def feedforward2(self, y):
        hi1 = sigmoid(self.w16 * y[0] + self.w17 * y[1] + self.w18 * y[2] + self.w19 * y[3]+ self.w20 * y[4] + self.b6)
        hi2 = sigmoid(self.w21 * y[0] + self.w22 * y[1] + self.w23 * y[2] + self.w24 * y[3]+ self.w25 * y[4] + self.b7)
        hi3 = sigmoid(self.w26 * y[0] + self.w27 * y[1] + self.w28 * y[2] + self.w29 * y[3]+ self.w30 * y[4] + self.b8)
        hi4 = sigmoid(self.w31 * y[0] + self.w32 * y[1] + self.w33 * y[2] + self.w34 * y[3]+ self.w35 * y[4] + self.b9)
        hi5 = sigmoid(self.w36 * y[0] + self.w37 * y[1] + self.w38 * y[2] + self.w39 * y[3]+ self.w40 * y[4] + self.b10)
        o1 = sigmoid(self.w41 * hi1 + self.w42 * hi2 + self.w43 * hi3 + self.w44 * hi4 + self.w45 * hi5 + self.b11)
        return o1

    def train(self, data, all_y_trues):
        '''
        - data is a (n x 3) numpy array, n = # of samples in the dataset.
        - all_y_trues is a numpy array with n elements.
          Elements in all_y_trues correspond to those in data.
        '''
        learn_rate = 0.1
        epochs = 1000 # number of times to loop through the entire dataset

        for epoch in range(epochs):
            for x, y_true in zip(data, all_y_trues):
                # --- Do a feedforward (we'll need these values later)

                h1 = sigmoid(self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1)
                h2 = sigmoid(self.w4 * x[0] + self.w4 * x[1] + self.w6 * x[2] + self.b2)
                h3 = sigmoid(self.w7 * x[0] + self.w8 * x[1] + self.w9 * x[2] + self.b3)
                h4 = sigmoid(self.w10 * x[0] + self.w11 * x[1] + self.w12 * x[2] + self.b4)
                h5 = sigmoid(self.w13 * x[0] + self.w14 * x[1] + self.w15 * x[2] + self.b5)

                sum_h1 = self.w1 * x[0] + self.w2 * x[1] + self.w3 * x[2] + self.b1
                sum_h2 = self.w4 * x[0] + self.w4 * x[1] + self.w6 * x[2] + self.b2
                sum_h3 = self.w7 * x[0] + self.w8 * x[1] + self.w9 * x[2] + self.b3
                sum_h4 = self.w10 * x[0] + self.w11 * x[1] + self.w12 * x[2] + self.b4
                sum_h5 = self.w13 * x[0] + self.w14 * x[1] + self.w15 * x[2] + self.b5
                z = feedforward(self,x)

                hi1 = sigmoid(self.w16 * z[0] + self.w17 * z[1] + self.w18 * z[2] + self.w19 * z[3]+ self.w20 * z[4] + self.b6)
                hi2 = sigmoid(self.w21 * z[0] + self.w22 * z[1] + self.w23 * z[2] + self.w24 * z[3]+ self.w25 * z[4] + self.b7)
                hi3 = sigmoid(self.w26 * z[0] + self.w27 * z[1] + self.w28 * z[2] + self.w29 * z[3]+ self.w30 * z[4] + self.b8)
                hi4 = sigmoid(self.w31 * z[0] + self.w32 * z[1] + self.w33 * z[2] + self.w34 * z[3]+ self.w35 * z[4] + self.b9)
                hi5 = sigmoid(self.w36 * z[0] + self.w37 * z[1] + self.w38 * z[2] + self.w39 * z[3]+ self.w40 * z[4] + self.b10)
                sum_hi1 = self.w16 * z[0] + self.w17 * z[1] + self.w18 * z[2] + self.w19 * z[3]+ self.w20 * z[4] + self.b6
                sum_hi2 = self.w21 * z[0] + self.w22 * z[1] + self.w23 * z[2] + self.w24 * z[3]+ self.w25 * z[4] + self.b7
                sum_hi3 = self.w26 * z[0] + self.w27 * z[1] + self.w28 * z[2] + self.w29 * z[3]+ self.w30 * z[4] + self.b8
                sum_hi4 = self.w31 * z[0] + self.w32 * z[1] + self.w33 * z[2] + self.w34 * z[3]+ self.w35 * z[4] + self.b9
                sum_hi5 = self.w36 * z[0] + self.w37 * z[1] + self.w38 * z[2] + self.w39 * z[3]+ self.w40 * z[4] + self.b10
                o1 = feedforward(self,z)
                sum_o1 = self.w41 * hi1 + self.w42 * hi2 + self.w43 * hi3 + self.w44 * hi4 + self.w45 * hi5 + self.b11
                y_pred = o1
                # --- Calculate partial derivatives.
                # --- Naming: d_L_d_w1 represents "partial L / partial w1"
                d_L_d_ypred = -2 * (y_true - y_pred)

                # Neuron o1 NEW
                d_ypred_d_w41 = hi1 * deriv_sigmoid(sum_o1)
                d_ypred_d_w42 = hi2 * deriv_sigmoid(sum_o1)
                d_ypred_d_w43 = hi3 * deriv_sigmoid(sum_o1)
                d_ypred_d_w44 = hi4 * deriv_sigmoid(sum_o1)
                d_ypred_d_w45 = hi5 * deriv_sigmoid(sum_o1)
                d_ypred_d_b11 = deriv_sigmoid(sum_o1)

                d_ypred_d_hi1 = self.w41 * deriv_sigmoid(sum_o1)
                d_ypred_d_hi2 = self.w42 * deriv_sigmoid(sum_o1)
                d_ypred_d_hi3 = self.w43 * deriv_sigmoid(sum_o1)
                d_ypred_d_hi4 = self.w44 * deriv_sigmoid(sum_o1)
                d_ypred_d_hi5 = self.w45 * deriv_sigmoid(sum_o1)

                d_ypred_d_h1 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w16 + self.w42 * deriv_sigmoid(sum_hi2) * self.w21 + self.w43 * deriv_sigmoid(sum_hi3) * self.w26 + self.w44 * deriv_sigmoid(sum_hi4) * self.w31 + self.w45 * deriv_sigmoid(sum_hi5) * self.w36)
                d_ypred_d_h2 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w17 + self.w42 * deriv_sigmoid(sum_hi2) * self.w22 + self.w43 * deriv_sigmoid(sum_hi3) * self.w27 + self.w44 * deriv_sigmoid(sum_hi4) * self.w32 + self.w45 * deriv_sigmoid(sum_hi5) * self.w37)
                d_ypred_d_h3 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w18 + self.w42 * deriv_sigmoid(sum_hi2) * self.w23 + self.w43 * deriv_sigmoid(sum_hi3) * self.w28 + self.w44 * deriv_sigmoid(sum_hi4) * self.w33 + self.w45 * deriv_sigmoid(sum_hi5) * self.w38)
                d_ypred_d_h4 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w19 + self.w42 * deriv_sigmoid(sum_hi2) * self.w24 + self.w43 * deriv_sigmoid(sum_hi3) * self.w29 + self.w44 * deriv_sigmoid(sum_hi4) * self.w34 + self.w45 * deriv_sigmoid(sum_hi5) * self.w39)
                d_ypred_d_h5 = deriv_sigmoid(sum_o1) * (self.w41 * deriv_sigmoid(sum_hi1) * self.w20 + self.w42 * deriv_sigmoid(sum_hi2) * self.w25 + self.w43 * deriv_sigmoid(sum_hi3) * self.w30 + self.w44 * deriv_sigmoid(sum_hi4) * self.w35 + self.w45 * deriv_sigmoid(sum_hi5) * self.w40)

                # Neuron hi1 NEW
                d_hi1_d_w16 = h1 * deriv_sigmoid(sum_hi1)
                d_hi1_d_w17 = h2 * deriv_sigmoid(sum_hi1)
                d_hi1_d_w18 = h3 * deriv_sigmoid(sum_hi1)
                d_hi1_d_w19 = h4 * deriv_sigmoid(sum_hi1)
                d_hi1_d_w20 = h5 * deriv_sigmoid(sum_hi1)
                d_hi1_d_b6 = deriv_sigmoid(sum_hi1)

                # Neuron hi2 NEW
                d_hi2_d_w21 = h1 * deriv_sigmoid(sum_hi2)
                d_hi2_d_w22 = h2 * deriv_sigmoid(sum_hi2)
                d_hi2_d_w23 = h3 * deriv_sigmoid(sum_hi2)
                d_hi2_d_w24 = h4 * deriv_sigmoid(sum_hi2)
                d_hi2_d_w25 = h5 * deriv_sigmoid(sum_hi2)
                d_hi2_d_b7 = deriv_sigmoid(sum_hi2)

                # Neuron hi3 NEW
                d_hi3_d_w26 = h1 * deriv_sigmoid(sum_hi3)
                d_hi3_d_w27 = h2 * deriv_sigmoid(sum_hi3)
                d_hi3_d_w28 = h3 * deriv_sigmoid(sum_hi3)
                d_hi3_d_w29 = h4 * deriv_sigmoid(sum_hi3)
                d_hi3_d_w30 = h5 * deriv_sigmoid(sum_hi3)
                d_hi3_d_b8 = deriv_sigmoid(sum_hi3)

                # Neuron hi4 NEW
                d_hi4_d_w31 = h1 * deriv_sigmoid(sum_hi4)
                d_hi4_d_w32 = h2 * deriv_sigmoid(sum_hi4)
                d_hi4_d_w33 = h3 * deriv_sigmoid(sum_hi4)
                d_hi4_d_w34 = h4 * deriv_sigmoid(sum_hi4)
                d_hi4_d_w35 = h5 * deriv_sigmoid(sum_hi4)
                d_hi4_d_b9 = deriv_sigmoid(sum_hi4)

                #Neuron hi5 NEW
                d_hi5_d_w36 = h1 * deriv_sigmoid(sum_hi5)
                d_hi5_d_w37 = h2 * deriv_sigmoid(sum_hi5)
                d_hi5_d_w38 = h3 * deriv_sigmoid(sum_hi5)
                d_hi5_d_w39 = h4 * deriv_sigmoid(sum_hi5)
                d_hi5_d_w40 = h5 * deriv_sigmoid(sum_hi5)
                d_hi5_d_b10 = deriv_sigmoid(sum_hi5)

                #Neuron h1 NEW
                d_h1_d_w1 = x[0] * deriv_sigmoid(sum_h1)
                d_h1_d_w2 = x[1] * deriv_sigmoid(sum_h1)
                d_h1_d_w3 = x[2] * deriv_sigmoid(sum_h1)
                d_h1_d_b1 = deriv_sigmoid(sum_h1)

                #Neuron h2 NEW
                d_h2_d_w4 = x[0] * deriv_sigmoid(sum_h2)
                d_h2_d_w5 = x[1] * deriv_sigmoid(sum_h2)
                d_h2_d_w6 = x[2] * deriv_sigmoid(sum_h2)
                d_h2_d_b2 = deriv_sigmoid(sum_h2)

                #Neuron h3 NEW
                d_h3_d_w7 = x[0] * deriv_sigmoid(sum_h3)
                d_h3_d_w8 = x[1] * deriv_sigmoid(sum_h3)
                d_h3_d_w9 = x[2] * deriv_sigmoid(sum_h3)
                d_h3_d_b3 = deriv_sigmoid(sum_h3)

                #Neuron h4 NEW
                d_h4_d_w10 = x[0] * deriv_sigmoid(sum_h4)
                d_h4_d_w11 = x[1] * deriv_sigmoid(sum_h4)
                d_h4_d_w12 = x[2] * deriv_sigmoid(sum_h4)
                d_h4_d_b4 = deriv_sigmoid(sum_h4)

                #Neuron h5 NEW
                d_h5_d_w13 = x[0] * deriv_sigmoid(sum_h5)
                d_h5_d_w14 = x[1] * deriv_sigmoid(sum_h5)
                d_h5_d_w15 = x[2] * deriv_sigmoid(sum_h5)
                d_h5_d_b5 = deriv_sigmoid(sum_h5)

                # --- Update weights and biases
                # Neuron h1
                self.w1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1
                self.w2 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2
                self.w3 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w3
                self.b1 -= learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_b1

                # Neuron h2
                self.w4 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w4
                self.w5 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w5
                self.w6 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_w6
                self.b2 -= learn_rate * d_L_d_ypred * d_ypred_d_h2 * d_h2_d_b2

                # Neuron h3
                self.w7 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w7
                self.w8 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w8
                self.w9 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_w9
                self.b3 -= learn_rate * d_L_d_ypred * d_ypred_d_h3 * d_h3_d_b3

                # Neuron h4
                self.w10 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w10
                self.w11 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w11
                self.w12 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_w12
                self.b4 -= learn_rate * d_L_d_ypred * d_ypred_d_h4 * d_h4_d_b4

                # Neuron h5
                self.w13 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_w13
                self.w14 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_w14
                self.w15 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_w15
                self.b5 -= learn_rate * d_L_d_ypred * d_ypred_d_h5 * d_h5_d_b5

                # Neuron hi1
                self.w16 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w16
                self.w17 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w17
                self.w18 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w18
                self.w19 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w19
                self.w20 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_w20
                self.b6 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi1_d_b6

                # Neuron hi2
                self.w21 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w21
                self.w22 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w22
                self.w23 -= learn_rate * d_L_d_ypred * d_ypred_d_hi1 * d_hi2_d_w23
                self.w24 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w24
                self.w25 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_w25
                self.b7 -= learn_rate * d_L_d_ypred * d_ypred_d_hi2 * d_hi2_d_b7

                # Neuron hi3
                self.w26 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w26
                self.w27 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w27
                self.w28 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w28
                self.w29 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w29
                self.w30 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_w30
                self.b8 -= learn_rate * d_L_d_ypred * d_ypred_d_hi3 * d_hi3_d_b8

                # Neuron hi4
                self.w31 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w31
                self.w32 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w32
                self.w33 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w33
                self.w34 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w34
                self.w35 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_w35
                self.b9 -= learn_rate * d_L_d_ypred * d_ypred_d_hi4 * d_hi4_d_b9

                # Neuron hi5 NEW
                self.w36 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w36
                self.w37 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w37
                self.w38 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w38
                self.w39 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w39
                self.w40 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_w40
                self.b10 -= learn_rate * d_L_d_ypred * d_ypred_d_hi5 * d_hi5_d_b10

                # Neuron o1 NEW
                self.w41 -= learn_rate * d_L_d_ypred * d_ypred_d_w41
                self.w42 -= learn_rate * d_L_d_ypred * d_ypred_d_w42
                self.w43 -= learn_rate * d_L_d_ypred * d_ypred_d_w43
                self.w44 -= learn_rate * d_L_d_ypred * d_ypred_d_w44
                self.w45 -= learn_rate * d_L_d_ypred * d_ypred_d_w45
                self.b11 -= learn_rate * d_L_d_ypred * d_ypred_d_b11

            # --- Calculate total loss at the end of each epoch
            if epoch % 10 == 0:
                firstlayer = np.apply_along_axis(self.feedforward, 1, data)
                y_preds = np.apply_along_axis(self.feedforward2, 1, firstlayer)
                loss = mse_loss(all_y_trues, y_preds)
                print("Epoch %d loss: %.3f" % (epoch, loss))

# Define dataset
data = np.array([
  [-2, -1, 16],  # Alice
  [25, 6, -5],   # Bob
  [17, 4, 20],   # Charlie
  [-15, -6 ,7], # Diana
])
all_y_trues = np.array([
  1, # Alice
  0, # Bob
  0, # Charlie
  1, # Diana
])

# Train our neural network!
network = OurNeuralNetwork()
network.train(data, all_y_trues)

ValueError Traceback (последний последний вызов) в 338 # Обучите нашу нейронную сеть! 339 network = OurNeuralNetwork () -> 340 network.train (data, all_y_trues)

в поезде (self, data, all_y_trues) 238 # --- Обновление весов и смещений 239 # Нейрон h1 -> 240self.w1 - = learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w1 241 self.w2 - = learn_rate * d_L_d_ypred * d_ypred_d_h1 * d_h1_d_w2 242 self.w3 - = многопользовательскийоперанд с формой (5,) не соответствует форме трансляции (5,5) В []:

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...