У меня проблема с размерами моей глубокой нейронной сети, которая выдает ошибку:
line 104, in linear_backward
dW = (dz @ a.T) * W * lambd / m
ValueError: operands could not be broadcast together with shapes (8,8) (8,32)
Мне кажется, что проблема заключается в обратном распространении, но я могу не найду мою ошибку. Я заранее прошу прощения за грязный код и был бы очень признателен за любую помощь, так как я школьник без помощи вокруг меня.
Программа выглядит следующим образом:
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
# Neural Network Build
def sigmoid(Z):
# activation function used to classify the output
# range of the function is between 0 and 1.
a = (1 + np.exp(-Z)) ** -1
return a
def relu(Z):
# activation function used in hidden layers
A = np.maximum(0, Z) # outputs Z if Z is positive, otherwise returns 0
assert (A.shape == Z.shape) #stops program if this isn't true
return A
def relu_back(dA, Z):
# function finding the derivative of the relu function
dZ = np.array(dA, copy=True)
dZ[Z <= 0] = 0
return dZ
def sigmoid_back(dA, Z):
# function finding the derivative of the sigmoid function
s = sigmoid(Z)
dZ = dA * s * (1 - s)
return dZ
def initialize_parameters(layer_dims):
parameters = {}
L = len(layer_dims)
for layer in range(1, L):
parameters["W" + str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer - 1]) * np.sqrt(
2 / layer_dims[layer - 1])
parameters["b" + str(layer)] = np.zeros((layer_dims[layer], 1))
print(parameters["W" + str(layer)].shape)
assert (parameters['W' + str(layer)].shape == (layer_dims[layer], layer_dims[layer - 1]))
assert (parameters['b' + str(layer)].shape == (layer_dims[layer], 1))
return parameters
def forward_prop(X, parameters):
L = len(parameters) // 2
forward_cache = {}
forward_cache["z1"] = parameters["W1"] @ X + parameters["b1"]
forward_cache["a1"] = relu(forward_cache["z1"])
for layer in range(2, L):
forward_cache["z" + str(layer)] = parameters["W" + str(layer)] @ forward_cache["a" + str(layer - 1)] + \
parameters["b" + str(layer)]
forward_cache["a" + str(layer)] = relu(forward_cache["z" + str(layer)])
# Output neuron will have sigmoid activation function applied to classify the output according to a probability.
# This needs to be handled separately.
forward_cache["z" + str(L)] = parameters["W" + str(L)] @ forward_cache["a" + str(L - 1)] + parameters["b" + str(L)]
forward_cache["a" + str(L)] = sigmoid(forward_cache["z" + str(L)])
AL = forward_cache["a" + str(L)]
assert (AL.shape == (1, X.shape[1]))
return AL, forward_cache # Return y_hat (the model's prediction) and forward_cache (used in back-prop)
def compute_loss(A, Y, parameters, lambd):
L = len(parameters) // 2
m = Y.shape[1]
log_function = -(np.multiply(Y, np.log(A)) + np.multiply((1 - Y), np.log(1 - A)))
L2_regularisation_cost = 0
for weight in range(1, L):
L2_regularisation_cost += np.sum(np.square(parameters["W" + str(weight)]))
loss = 1. / m * (np.nansum(log_function) + (L2_regularisation_cost * lambd / 2))
loss = np.squeeze(loss)
assert (loss.shape == ())
return loss
def linear_backward(dz, W, b, a, lambd):
m = a.shape[1]
dW = (dz @ a.T) * W * lambd / m
db = np.sum(dz, axis=1, keepdims=True)
dA_prev = W.T @ dz
print("W:", W.shape,"dZ:", dz.shape)
print(dA_prev.shape, a.shape)
# assert (dA_prev.shape == a.shape)
assert (dW.shape == W.shape)
assert (db.shape == b.shape)
return dW, db, dA_prev
def activation_backward(dA, a, z, W, b, activation_function, lambd):
if activation_function == "sigmoid":
dz = sigmoid_back(dA, z)
elif activation_function == "relu":
dz = relu_back(dA, z)
dW, db, dA_prev = linear_backward(dz, W, b, a, lambd)
return dW, db, dA_prev
def backward_prop(AL, Y, cache, parameters, lambd):
L = len(parameters) // 2
print(AL)
gradients = {}
dAL = -(np.divide(Y, AL)) + np.divide(1 - Y, 1 - AL)
dz = sigmoid_back(dAL, cache["z" + str(L)])
gradients["dW" + str(L)], gradients["db" + str(L)], dA_prev = \
linear_backward(dz, parameters["W" + str(L)], parameters["b" + str(L)], cache["a" + str(L)], lambd)
for l in reversed(range(1, L)):
gradients["dW" + str(l)], gradients["db" + str(l)], dA_prev = \
activation_backward(dA_prev, cache["a" + str(l)], cache["z" + str(l)],
parameters["W" + str(l)], parameters["b" + str(l)],
"relu", lambd)
return gradients
def update_parameters(parameters, gradients, learning_rate):
L = len(parameters) // 2
for l in range(1, L + 1):
parameters["W" + str(l)] = parameters["W" + str(l)] - (gradients["dW" + str(l)] * learning_rate)
parameters["b" + str(l)] = parameters["b" + str(l)] - (gradients["db" + str(l)] * learning_rate)
return parameters
def predict(X, Y, parameters):
"""
This function is used to predict the results of a L-layer neural network.
Arguments:
X -- data set of examples you would like to label
parameters -- parameters of the trained model
Returns:
p -- predictions for the given dataset X
"""
m = X.shape[1]
# n = len(parameters) // 2 number of layers in the neural network
binary_outcome = np.zeros((1, m))
# Forward propagation
probabilities = deep_neural_network(X, Y, parameters, )
# convert probabilities to 0/1 predictions
for i in range(0, probabilities.shape[1]):
if probabilities[0, i] > 0.5:
binary_outcome[0, i] = 1
else:
binary_outcome[0, i] = 0
# print results
# print ("predictions: " + str(p))
# print ("true labels: " + str(y))
print("Accuracy: " + str(np.sum((binary_outcome == Y) / m)))
return binary_outcome
def deep_neural_network(X, Y, layer_dims, number_of_iterations, learning_rate, print_cost, lambd):
losses = []
for epoch in range(number_of_iterations):
parameters = initialize_parameters(layer_dims)
AL, forward_cache = forward_prop(X, parameters)
loss = compute_loss(AL, Y, parameters, lambd)
losses.append(loss)
gradients = backward_prop(AL, Y, forward_cache, parameters, lambd)
parameters = update_parameters(parameters, gradients, learning_rate)
if (epoch % 100 == 0) and print_cost:
print(loss)
return parameters
return
cancer = load_breast_cancer()
data = cancer.data
labels = cancer.target
xtrain, xtest, ytrain, ytest = train_test_split(data, labels)
xtrain = xtrain.T
ytrain = ytrain.reshape((1, 426))
parameters, costs = deep_neural_network(xtrain, ytrain, [30, 64, 32, 8, 1], learning_rate=0.045,
number_of_iterations=2000, print_cost=True, lambd=0.01)
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(0.075))
plt.show()
predictions = predict(X=xtrain, parameters=parameters)