Я новичок в машинном обучении, и мне было поручено повторить код из Бинаризованные нейронные сети: обучение нейронных сетей с весами и
Активация ограничена +1 или -1
. Я хотел бы реализовать это на Керасе.
Я написал код, как показано ниже, однако полученный результат застрял на 9,87%
BNN.ipynb
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D,Dropout, Flatten, Dense
from keras.datasets import mnist
from keras.layers import Dense, Activation, BatchNormalization
from keras.constraints import min_max_norm
from keras.optimizers import SGD
from activations import binary_tanh as binary_tanh_op
#Load the data
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(-1, 784)
X_test = X_test.reshape(-1, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
#one-hot encoding
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
#import to obtain _hard_tanh activation
def binary_tanh(x):
return binary_tanh_op(x)
model = Sequential()
model.add(Dense(256, input_dim=784, activation=binary_tanh, kernel_initializer='glorot_uniform', bias_initializer='zeros'))
BatchNormalization(momentum=0.9,epsilon=0.000001)
model.add(Dense(128,activation=binary_tanh))
BatchNormalization(momentum=0.9,epsilon=0.000001)
model.add(Dense(64,activation=binary_tanh))
BatchNormalization(momentum=0.9,epsilon=0.000001)
model.add(Dense(10, activation=binary_tanh))
# Step 2: Build the Model
print(model.summary())
for layer in model.layers:
h = layer.get_weights()
print(h)
opt = SGD(lr=0.000001)
# Step 3: Compile the Model
model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=['accuracy'])
# Step 4: Train the Model
model.fit(X_train,y_train,epochs=50,batch_size=100)
Это файл activations.py
from __future__ import absolute_import
import keras.backend as K
def round_through(x):
'''Element-wise rounding to the closest integer with full gradient propagation.
A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
'''
rounded = K.round(x)
return x + K.stop_gradient(rounded - x)
def _hard_sigmoid(x):
'''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid).
# Reference:
- [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
'''
x = (0.5 * x) + 0.5
return K.clip(x, 0, 1)
def binary_sigmoid(x):
'''Binary hard sigmoid for training binarized neural network.
# Reference:
- [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
'''
return round_through(_hard_sigmoid(x))
def binary_tanh(x):
'''Binary hard sigmoid for training binarized neural network.
The neurons' activations binarization function
It behaves like the sign function during forward propagation
And like:
hard_tanh(x) = 2 * _hard_sigmoid(x) - 1
clear gradient when |x| > 1 during back propagation
# Reference:
- [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
'''
return 2 * round_through(_hard_sigmoid(x)) - 1
def binarize(W, H=1):
'''The weights' binarization function,
# Reference:
- [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830}
'''
# [-H, H] -> -H or H
Wb = H * binary_tanh(W / H)
return Wb
def _mean_abs(x, axis=None, keepdims=False):
return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims))
def xnorize(W, H=1., axis=None, keepdims=False):
Wb = binarize(W, H)
Я пытался изменить скорость обучения с 0,001 до 0,000001, мой оптимизатор с Адама на SGD, добавил показатели отсева, но моя модель все еще застряла на 9,87%.
Результат обучения здесь
Есть ли способ повысить точность до 90 +%?