# Import libraries
import random
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm, trange
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
class FFNN_XOR:
def __init__(self):
np.random.seed(0)
self.w1 = np.random.randn()
self.w2 = np.random.randn()
self.w3 = np.random.randn()
self.w4 = np.random.randn()
self.w5 = np.random.randn()
self.w6 = np.random.randn()
self.b1 = np.random.randn()
self.b2 = np.random.randn()
self.b3 = np.random.randn()
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
self.x1, self.x2 = x
self.a1 = self.w1*self.x1 + self.w2*self.x2 + self.b1
self.h1 = self.sigmoid(self.a1)
self.a2 = self.w3*self.x1 + self.w4*self.x2 + self.b2
self.h2 = self.sigmoid(self.a2)
self.a3 = self.w5*self.h1 + self.w6*self.h2 + self.b3
self.h3 = self.sigmoid(self.a3)
return self.h3
def grad(self, x, y):
self.forward_pass(x)
self.dw5 = (self.h3-y) * self.h3*(1-self.h3) * self.h1
self.dw6 = (self.h3-y) * self.h3*(1-self.h3) * self.h2
self.db3 = (self.h3-y) * self.h3*(1-self.h3)
self.dw1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x1
self.dw2 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1) * self.x2
self.db1 = (self.h3-y) * self.h3*(1-self.h3) * self.w5 * self.h1*(1-self.h1)
self.dw3 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x1
self.dw4 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2) * self.x2
self.db2 = (self.h3-y) * self.h3*(1-self.h3) * self.w6 * self.h2*(1-self.h2)
def fit(self, X, Y, epochs=1, learning_rate=0.1, initialise=True, display_loss=False, display_weight=False):
# initialise w, b
if initialise:
np.random.seed(0)
self.w1 = np.random.randn()
self.w2 = np.random.randn()
self.w3 = np.random.randn()
self.w4 = np.random.randn()
self.w5 = np.random.randn()
self.w6 = np.random.randn()
self.b1 = 0
self.b2 = 0
self.b3 = 0
if display_loss:
loss = {}
for i in tqdm(range(epochs), total=epochs, unit="epoch"):
dw1, dw2, dw3, dw4, dw5, dw6, db1, db2, db3 = [0]*9
for x, y in zip(X, Y):
self.grad(x, y)
dw1 += self.dw1
dw2 += self.dw2
dw3 += self.dw3
dw4 += self.dw4
dw5 += self.dw5
dw6 += self.dw6
db1 += self.db1
db2 += self.db2
db3 += self.db3
m = X.shape[0]
self.w1 -= learning_rate * dw1 / m
self.w2 -= learning_rate * dw2 / m
self.w3 -= learning_rate * dw3 / m
self.w4 -= learning_rate * dw4 / m
self.w5 -= learning_rate * dw5 / m
self.w6 -= learning_rate * dw6 / m
self.b1 -= learning_rate * db1 / m
self.b2 -= learning_rate * db2 / m
self.b3 -= learning_rate * db3 / m
if display_loss:
Y_pred = self.predict(X)
loss[i] = mean_squared_error(Y_pred, Y)
if display_weight:
weight_matrix = np.array([[0, self.b3, self.w5, self.w6, 0, 0], [self.b1, self.w1, self.w2, self.b2, self.w3, self.w4]])
weight_matrices.append(weight_matrix)
if display_loss:
loss_list = list(map(float, loss.values()))
plt.plot(loss_list)
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred)
X = np.array([[0,0],
[0,1],
[1,0],
[1,1]])
Y = np.array([0,1,1,0])
print(X)
print(Y)
weight_matrices = []
ffn = FFNN_XOR()
ffn.fit(X, Y, epochs=2000, learning_rate=5,
display_loss=True, display_weight=True)
result = ffn.predict(X)
result
print("ACtual : ",Y)
OUTPUT: массив ([0.0056347, 0.49994107, 0.99279204, 0.50012799]) ACtual: [0 1 1 0] В следующем коде я реализовал задачу классификации xor с использованием Deep нейронной сети. Он содержит 1 скрытый слой с 2 нейронами в нем. Но код не дает правильных результатов. Может кто-нибудь помочь мне решить проблему?
Структура нейронной сети Ошибка по сравнению с графиком эпох