Проблема должна заключаться в транспонировании и получении точечного произведения на шаге обратного распространения.
Мой код на XOR:
import numpy as np
def sigmoid(z):
return 1/(1+np.exp(-z))
def sigmoid_derivative(z):
return np.multiply(sigmoid(z), sigmoid(1.0-z))
def init_w(epsilon):
# Input nodes
theta1=2*np.random.random([2,3])*epsilon - epsilon
# Output nodes
theta2=2*np.random.random([1,3])*epsilon - epsilon
theta1,theta2=np.mat(theta1),np.mat(theta2)
return theta1,theta2
def fit(X, Y, theta1,theta2, predict=False, x=None):
grad1,grad2=np.mat(np.zeros(np.shape(theta1))),np.mat(np.zeros(np.shape(theta2)))
for i in range(len(X)):
x = x if predict else X[i]
y = Y[0,i]
# forward propagate
a = x
a1=np.mat(np.append(1, a)).T
z2=theta1*a1
a2=sigmoid(z2)
a2=np.mat(np.append(1, a2)).T
z3=theta2*a2
a3=sigmoid(z3)
if predict: return a3
# back propagate
delta3 = a3 - y.T
grad2 += delta3 * a2.T
delta2 = np.multiply(theta2.T*delta3, sigmoid_derivative(a2))
grad1 += (delta2[1:] * a1.T)
return grad1,grad2
def predict(x):
return fit(X, Y, theta1,theta2, True, x)
X = np.mat([[0,0],
[0,1],
[1,0],
[1,1]])
Y = np.mat([0,1,1,0])
epochs = 10000
alpha = 0.85
epsilon = 1
theta1,theta2 = init_w(epsilon)
for i in range(epochs):
g1,g2 = fit(X, Y, theta1,theta2)
theta1 -= alpha * g1
theta2 -= alpha * g2
for i in range(len(X)):
x = X[i]
guess = predict(x)
print(x, ":", guess)
Вывод:
[[0 0]] : [[ 0.00233143]]
[[0 1]] : [[ 0.99775431]]
[[1 0]] : [[ 0.9977526]]
[[1 1]] : [[ 0.00233134]]
Редактировать:
Ваш формат массива слишком сложен, поэтому я предлагаю вам записывать формы после каждого шага, чтобы вы могли легко отлаживать.
Обновление:
import numpy as np
#np.random.seed(0)
def sigmoid (x):
return 1/(1 + np.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
#Input datasets
inputs = np.array([[0,0],[0,1],[1,0],[1,1]])
expected_output = np.array([[0],[1],[1],[0]])
epochs = 10000
lr = 0.1
inputLayerNeurons, hiddenLayerNeurons, outputLayerNeurons = 2,2,1
#Random weights and bias initialization
#hidden_weights = np.random.uniform(size=(inputLayerNeurons,hiddenLayerNeurons))
#hidden_bias =np.random.uniform(size=(1,hiddenLayerNeurons))
#output_weights = np.random.uniform(size=(hiddenLayerNeurons,outputLayerNeurons))
#output_bias = np.random.uniform(size=(1,outputLayerNeurons))
hidden_weights = np.array([
[0.2, 0.3],
[0.4, 0.5]
])
hidden_bias = np.array([[0.3, 0.6]])
output_weights = np.array([[0.6], [0.7]])
output_bias = np.array([[0.5]])
print("Initial hidden weights: ",end='')
print(*hidden_weights)
print("Initial hidden biases: ",end='')
print(*hidden_bias)
print("Initial output weights: ",end='')
print(*output_weights)
print("Initial output biases: ",end='')
print(*output_bias)
#Training algorithm
for _ in range(epochs):
#Forward Propagation
hidden_layer_activation = np.dot(inputs,hidden_weights)
hidden_layer_activation += hidden_bias
hidden_layer_output = sigmoid(hidden_layer_activation)
output_layer_activation = np.dot(hidden_layer_output,output_weights)
output_layer_activation += output_bias
predicted_output = sigmoid(output_layer_activation)
#Backpropagation
error = expected_output - predicted_output
d_predicted_output = error * sigmoid_derivative(predicted_output)
error_hidden_layer = d_predicted_output.dot(output_weights.T)
d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
#Updating Weights and Biases
output_weights += hidden_layer_output.T.dot(d_predicted_output) * lr
output_bias += np.sum(d_predicted_output,axis=0,keepdims=True) * lr
hidden_weights += inputs.T.dot(d_hidden_layer) * lr
hidden_bias += np.sum(d_hidden_layer,axis=0,keepdims=True) * lr
print("Final hidden weights: ",end='')
print(*hidden_weights)
print("Final hidden bias: ",end='')
print(*hidden_bias)
print("Final output weights: ",end='')
print(*output_weights)
print("Final output bias: ",end='')
print(*output_bias)
print("\nOutput from neural network after 10,000 epochs: ",end='')
print(*predicted_output)
test = np.array([
[0, 1]
])
hidden_layer_activation = np.dot(test, hidden_weights)
hidden_layer_activation += hidden_bias
hidden_layer_output = sigmoid(hidden_layer_activation)
output_layer_activation = np.dot( hidden_layer_output, output_weights)
output_layer_activation += output_bias
predicted_output = sigmoid(output_layer_activation)
print(predicted_output)
Final hidden weights: [3.59882402 5.68799788] [3.60260363 5.70714658]
Final hidden bias: [-5.50709978 -2.3415549 ]
Final output weights: [-7.85976304] [7.26409199]
Final output bias: [-3.26766959]
Output from neural network after 10,000 epochs: [0.06525552] [0.93906737] [0.93899963] [0.06635071]
[[0.93907536]]
вот результат:
[[0.93907536]]