Я пробовал нормализацию, функции активации, затраты, различные методы инициализации веса, и все же сеть не хочет сходиться. Я действительно не знаю, что делать. Любой, у кого есть время и хочет помочь, вот код:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
class Layer():
#Activation Functions and their derivatives
@staticmethod
def sigmoid(x):
return np.where(x >= 0, 1. / (1. + np.exp(-x)), np.exp(x) / (1. + np.exp(x)))
@staticmethod
def dsigmoid(x):
return Layer.sigmoid(x) * (1. - Layer.sigmoid(x))
@staticmethod
def softmax(x):
exp = np.exp(x - np.max(x))
return exp / np.sum(exp)
@staticmethod
def dsoftmax(x):
return Layer.softmax(x) * (1. - Layer.softmax(x))
@staticmethod
def relu(x):
return np.where(x>=0, x, 0)
@staticmethod
def drelu(x):
return np.where(x>=0, 1, 0)
@staticmethod
def leaky_relu(x):
return np.where(x>=0, x, 0.01*x)
@staticmethod
def dleaky_relu(x):
return np.where(x>=0, 1, 0.01)
functions = {
'sigmoid': sigmoid.__func__,
'softmax': softmax.__func__,
'relu': relu.__func__,
'leaky_relu': leaky_relu.__func__,
'input': np.nan
}
dfunctions = {
'sigmoid': dsigmoid.__func__,
'softmax': dsoftmax.__func__,
'relu': drelu.__func__,
'leaky_relu': dleaky_relu.__func__,
'input': np.nan
}
#Layar init, weights and other ??? are assigned later
def __init__(self, n_nodes, activation='sigmoid'):
self.n_nodes = n_nodes
self.activation = Layer.functions[activation]
self.dactivation = Layer.dfunctions[activation]
class NeuralNetwork():
#Cost functions and their derivatives
@staticmethod
def quadratic(y, pred):
return np.sum((y - pred) ** 2) * 0.5
@staticmethod
def dquadratic(y, pred):
return pred - y
@staticmethod
def cross_entropy(y, pred):
return -1. * np.dot(y, np.log(pred))
@staticmethod
def dcross_entropy(y, pred):
return pred - y
costs = {
'quadratic': quadratic.__func__,
'log_loss': cross_entropy.__func__,
'cross_entropy': cross_entropy.__func__
}
dcosts = {
'quadratic': dquadratic.__func__,
'log_loss': dcross_entropy.__func__,
'cross_entropy': dcross_entropy.__func__
}
def __init__(self, cost, lr=0.03, epochs=10, verbose=True):
self.lr = lr
self.epochs = epochs
self.verbose = verbose
self.cost = NeuralNetwork.costs[cost]
self.dcost = NeuralNetwork.dcosts[cost]
def fit(self, X, y, layers):
self.X = X
self.y = y
#Input layer
self.layers = []
self.layers.append(Layer(self.X[0].shape[0], 'input'))
#Initialize layers and weights
for index, layer in enumerate(layers):
layer.n_prev = self.layers[index].n_nodes
layer.weights = np.random.rand(layer.n_nodes, layer.n_prev) * np.sqrt(2. / layer.n_nodes+layer.n_prev)
self.layers.append(layer)
for epoch in range(self.epochs):
if self.verbose==True:
print(f'Epoch: {epoch+1}')
epoch_cost = 0
for X, y in tqdm(zip(self.X, self.y), total=self.X.shape[0]):
self.layers[0].a = X
#Propagation
for layer in self.layers[1:]:
layer.z = np.dot(layer.weights, self.layers[self.layers.index(layer)-1].a)
try:
layer.a = layer.activation(layer.z)
except RuntimeWarning:
print(layer.z)
#Errors
self.layers[-1].a[self.layers[-1].a==0] = 10 ** -10
epoch_cost += self.cost(y, self.layers[-1].a)
self.layers[-1].error = np.multiply(self.dcost(y, self.layers[-1].a),
self.layers[-1].dactivation(self.layers[-1].z))
#print(self.layers[-1].error)
for layer in reversed(self.layers[1:-1]):
layer.error = (np.dot(np.transpose(self.layers[self.layers.index(layer)+1].weights),
self.layers[self.layers.index(layer)+1].error)) * layer.dactivation(layer.z)
#Gradients and update weights
for layer in self.layers[1:]:
layer.gradients = np.dot(layer.error.reshape(-1, 1),
self.layers[self.layers.index(layer)-1].a.reshape(1, -1))
layer.weights -= self.lr * layer.gradients
#ADD METRICS
if self.verbose==True:
print(f'Cost: {(epoch_cost / self.y.shape[0]).round(4)}')
def predict(self, X):
self.X_test = X
predictions = []
for X in self.X_test:
#Input
self.layers[0].a = X
#Propagation
for layer in self.layers[1:]:
layer.z = np.dot(layer.weights, self.layers[self.layers.index(layer)-1].a)
layer.a = layer.activation(layer.z)
predictions.append(np.where(self.layers[-1].a>0.5, 1, 0))
return predictions
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)
y_classes = np.copy(y)
y = np.array(pd.get_dummies(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#Normalize
X_train, X_test = X_train / 255., X_test / 255.
nn = NeuralNetwork(lr=0.3, epochs=10, cost='cross_entropy')
layers = [
Layer(200, 'leaky_relu'),
Layer(100, 'leaky_relu'),
Layer(50, 'leaky_relu'),
Layer(10, 'softmax')
]
nn.fit(X_train, y_train, layers)
Обратите внимание, что этот код работает применительно к задаче двоичной классификации с простыми функциями активации сигмоида.