Question

У меня проблема с размерами моей глубокой нейронной сети, которая выдает ошибку:

line 104, in linear_backward
dW = (dz @ a.T) * W * lambd / m
ValueError: operands could not be broadcast together with shapes (8,8) (8,32)

Мне кажется, что проблема заключается в обратном распространении, но я могу не найду мою ошибку. Я заранее прошу прощения за грязный код и был бы очень признателен за любую помощь, так как я школьник без помощи вокруг меня.

Программа выглядит следующим образом:

import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


# Neural Network Build

def sigmoid(Z):
    # activation function used to classify the output
    # range of the function is between 0 and 1.
    a = (1 + np.exp(-Z)) ** -1
    return a


def relu(Z):
    # activation function used in hidden layers

    A = np.maximum(0, Z)  # outputs Z if Z is positive, otherwise returns 0

    assert (A.shape == Z.shape) #stops program if this isn't true

    return A


def relu_back(dA, Z):
    # function finding the derivative of the relu function
    dZ = np.array(dA, copy=True)

    dZ[Z <= 0] = 0

    return dZ


def sigmoid_back(dA, Z):
    # function finding the derivative of the sigmoid function
    s = sigmoid(Z)

    dZ = dA * s * (1 - s)

    return dZ


def initialize_parameters(layer_dims):
    parameters = {}
    L = len(layer_dims)
    for layer in range(1, L):
        parameters["W" + str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer - 1]) * np.sqrt(
            2 / layer_dims[layer - 1])
        parameters["b" + str(layer)] = np.zeros((layer_dims[layer], 1))

        print(parameters["W" + str(layer)].shape)

        assert (parameters['W' + str(layer)].shape == (layer_dims[layer], layer_dims[layer - 1]))
        assert (parameters['b' + str(layer)].shape == (layer_dims[layer], 1))

    return parameters


def forward_prop(X, parameters):
    L = len(parameters) // 2

    forward_cache = {}

    forward_cache["z1"] = parameters["W1"] @ X + parameters["b1"]
    forward_cache["a1"] = relu(forward_cache["z1"])

    for layer in range(2, L):
        forward_cache["z" + str(layer)] = parameters["W" + str(layer)] @ forward_cache["a" + str(layer - 1)] + \
                                          parameters["b" + str(layer)]
        forward_cache["a" + str(layer)] = relu(forward_cache["z" + str(layer)])

    # Output neuron will have sigmoid activation function applied to classify the output according to a probability.
    # This needs to be handled separately.
    forward_cache["z" + str(L)] = parameters["W" + str(L)] @ forward_cache["a" + str(L - 1)] + parameters["b" + str(L)]
    forward_cache["a" + str(L)] = sigmoid(forward_cache["z" + str(L)])

    AL = forward_cache["a" + str(L)]

    assert (AL.shape == (1, X.shape[1]))

    return AL, forward_cache # Return y_hat (the model's prediction) and forward_cache (used in back-prop)


def compute_loss(A, Y, parameters, lambd):
    L = len(parameters) // 2
    m = Y.shape[1]
    log_function = -(np.multiply(Y, np.log(A)) + np.multiply((1 - Y), np.log(1 - A)))
    L2_regularisation_cost = 0
    for weight in range(1, L):
        L2_regularisation_cost += np.sum(np.square(parameters["W" + str(weight)]))

    loss = 1. / m * (np.nansum(log_function) + (L2_regularisation_cost * lambd / 2))

    loss = np.squeeze(loss)

    assert (loss.shape == ())

    return loss


def linear_backward(dz, W, b, a, lambd):
    m = a.shape[1]
    dW = (dz @ a.T) * W * lambd / m
    db = np.sum(dz, axis=1, keepdims=True)
    dA_prev = W.T @ dz

    print("W:", W.shape,"dZ:", dz.shape)
    print(dA_prev.shape, a.shape)

#    assert (dA_prev.shape == a.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)

    return dW, db, dA_prev


def activation_backward(dA, a, z, W, b, activation_function, lambd):
    if activation_function == "sigmoid":
        dz = sigmoid_back(dA, z)
    elif activation_function == "relu":
        dz = relu_back(dA, z)

    dW, db, dA_prev = linear_backward(dz, W, b, a, lambd)

    return dW, db, dA_prev


def backward_prop(AL, Y, cache, parameters, lambd):
    L = len(parameters) // 2
    print(AL)
    gradients = {}
    dAL = -(np.divide(Y, AL)) + np.divide(1 - Y, 1 - AL)
    dz = sigmoid_back(dAL, cache["z" + str(L)])
    gradients["dW" + str(L)], gradients["db" + str(L)], dA_prev = \
        linear_backward(dz, parameters["W" + str(L)], parameters["b" + str(L)], cache["a" + str(L)], lambd)

    for l in reversed(range(1, L)):
        gradients["dW" + str(l)], gradients["db" + str(l)], dA_prev = \
            activation_backward(dA_prev, cache["a" + str(l)], cache["z" + str(l)],
                                parameters["W" + str(l)], parameters["b" + str(l)],
                                "relu", lambd)

    return gradients


def update_parameters(parameters, gradients, learning_rate):
    L = len(parameters) // 2

    for l in range(1, L + 1):
        parameters["W" + str(l)] = parameters["W" + str(l)] - (gradients["dW" + str(l)] * learning_rate)
        parameters["b" + str(l)] = parameters["b" + str(l)] - (gradients["db" + str(l)] * learning_rate)

    return parameters


def predict(X, Y, parameters):
    """
        This function is used to predict the results of a  L-layer neural network.

        Arguments:
        X -- data set of examples you would like to label
        parameters -- parameters of the trained model

        Returns:
        p -- predictions for the given dataset X
        """

    m = X.shape[1]
    # n = len(parameters) // 2 number of layers in the neural network
    binary_outcome = np.zeros((1, m))

    # Forward propagation
    probabilities = deep_neural_network(X, Y, parameters, )

    # convert probabilities to 0/1 predictions
    for i in range(0, probabilities.shape[1]):
        if probabilities[0, i] > 0.5:
            binary_outcome[0, i] = 1
        else:
            binary_outcome[0, i] = 0

    # print results
    # print ("predictions: " + str(p))
    # print ("true labels: " + str(y))
    print("Accuracy: " + str(np.sum((binary_outcome == Y) / m)))

    return binary_outcome


def deep_neural_network(X, Y, layer_dims, number_of_iterations, learning_rate, print_cost, lambd):
    losses = []

    for epoch in range(number_of_iterations):

        parameters = initialize_parameters(layer_dims)

        AL, forward_cache = forward_prop(X, parameters)

        loss = compute_loss(AL, Y, parameters, lambd)

        losses.append(loss)

        gradients = backward_prop(AL, Y, forward_cache, parameters, lambd)

        parameters = update_parameters(parameters, gradients, learning_rate)

        if (epoch % 100 == 0) and print_cost:
            print(loss)

    return parameters

    return


cancer = load_breast_cancer()
data = cancer.data
labels = cancer.target

xtrain, xtest, ytrain, ytest = train_test_split(data, labels)

xtrain = xtrain.T
ytrain = ytrain.reshape((1, 426))

parameters, costs = deep_neural_network(xtrain, ytrain, [30, 64, 32, 8, 1], learning_rate=0.045,
                                        number_of_iterations=2000, print_cost=True, lambd=0.01)
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(0.075))
plt.show()
predictions = predict(X=xtrain, parameters=parameters)

Проблема измерения глубокой нейронной сети

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Проблема измерения глубокой нейронной сети

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 0 ]

Похожие темы