Я написал нейронную сеть на C ++, которая использует библиотеку Eigen, и я пытаюсь проверить ее точность, обучая ее на основе данных почерка MNIST. Я обучаю сеть, но получаю только ~ 10% точности при проверке ее по данным испытаний до и после тренировки. Это должно быть около 90%, если я не ошибаюсь. Я не уверен, почему обучение не работает, и я хотел бы выяснить это. Я проверил поток через отладчик, а также дважды проверил, правильно ли я обрабатываю входные данные. Вот мой соответствующий код реализации, остальное доступно в этом хранилище :
Сеть. cpp
Network::Network(const std::vector<int> layer_data)
: weights(0), biases(0)
{
initialize_layers(layer_data);
generate_weights();
generate_biases();
}
Network::Network(const std::vector<int> layer_data,
std::vector<Eigen::MatrixXd> network_weights, std::vector<Eigen::MatrixXd> network_biases)
{
initialize_layers(layer_data);
weights = network_weights;
biases = network_biases;
}
void Network::initialize_layers(std::vector<int> layer_data)
{
layers = layer_data;
non_input_layers = std::vector<int>(layers.begin() + 1, layers.end());
}
void Network::generate_biases()
{
biases.reserve(non_input_layers.size());
for (size_t i = 0; i < non_input_layers.size(); i++)
{
biases.push_back(Eigen::MatrixXd::Random(non_input_layers[i], 1));
}
}
void Network::generate_weights()
{
weights.reserve(layers.size() - 1);
for (auto it = layers.begin(); it != layers.end() - 1; it++)
{
weights.push_back(Eigen::MatrixXd::Random(*next(it), *it));
}
}
std::tuple<Eigen::MatrixXd, std::vector<Eigen::MatrixXd>, std::vector<Eigen::MatrixXd>> Network::feed_forward(Eigen::VectorXd input)
{
std::vector<Eigen::MatrixXd> net_inputs;
std::vector<Eigen::MatrixXd> activations;
net_inputs.reserve(non_input_layers.size() - 1);
activations.reserve(non_input_layers.size());
activations.push_back(input);
for (size_t i = 0; i < non_input_layers.size(); i++)
{
input = network_calc::multiply_matrices(weights[i], input) + biases[i];
net_inputs.push_back(input);
input = input.unaryExpr([] (double x) {
return 1.0 / (1.0 + exp(-x));
});
activations.push_back(input);
}
return std::make_tuple(input, net_inputs, activations);
}
std::pair<std::vector<Eigen::MatrixXd>, std::vector<Eigen::MatrixXd>> Network::backpropagate(Eigen::MatrixXd input, Eigen::MatrixXd expected_output)
{
auto feed_forward_results = (*this).feed_forward(input);
Eigen::MatrixXd calculated_output = std::get<0>(feed_forward_results);
std::vector<Eigen::MatrixXd> net_inputs = std::get<1>(feed_forward_results);
std::vector<Eigen::MatrixXd> activations = std::get<2>(feed_forward_results);
std::vector<Eigen::MatrixXd> cloned_weights(weights);
std::vector<Eigen::MatrixXd> cloned_biases(biases);
for (size_t i = 0; i < layers[layers.size() - 1]; i++)
{
cloned_biases[cloned_biases.size() - 1](i, 0) = network_calc::result_difference(expected_output(i, 0), calculated_output(i, 0)) *
network_calc::sigmoid_derivative(net_inputs[net_inputs.size() - 1](i));
}
Eigen::MatrixXd delta = cloned_biases[cloned_biases.size() - 1];
cloned_weights[cloned_weights.size() - 1] = network_calc::multiply_matrices(delta, activations[activations.size() - 2].transpose());
for (size_t i = non_input_layers.size() - 1; !(i == 0); i--)
{
Eigen::MatrixXd partial_calc = net_inputs[i - 1].unaryExpr([] (double x) {
return network_calc::sigmoid_derivative(x);
});
delta = network_calc::multiply_matrices(weights[i].transpose(), delta).cwiseProduct(partial_calc);
cloned_biases[i - 1] = delta;
cloned_weights[i - 1] = network_calc::multiply_matrices(delta, activations[i - 1].transpose());
}
return std::make_pair(cloned_biases, cloned_weights);
}
void Network::mini_batch_gradient_descent(double alpha, int epochs, int batch_size, std::vector<xy_data> training_data)
{
auto rng = std::default_random_engine{};
std::vector<xy_data> mini_batch;
mini_batch.reserve(batch_size);
if (training_data.size() < batch_size)
{
std::cout << "Warning: training data size is less than the batch size. Batch size will be changed to match training data size.\n";
batch_size = training_data.size();
}
for (size_t i = 0; i < epochs; i++)
{
std::shuffle(std::begin(training_data), std::end(training_data), rng);
for (size_t j = 0; j < batch_size; j++)
{
mini_batch.push_back(training_data[j]);;
}
update_weights_biases(mini_batch, alpha);
// Perform prediction on the last example just to check
Eigen::MatrixXd new_predicted_output = std::get<2>((*this).feed_forward(training_data.back().first)).back();
std::cout << "Epoch " << std::to_string(i) << ", "
<< "SSE: " << std::to_string(network_calc::sum_squared_error(new_predicted_output, training_data.back().second)) << ".\n";
}
}
std::vector<Eigen::MatrixXd> Network::get_weights()
{
return weights;
}
std::vector<Eigen::MatrixXd> Network::get_biases()
{
return biases;
}
std::vector<int> Network::get_layers()
{
return layers;
}
void Network::update_weights_biases(std::vector<xy_data> batch, double alpha)
{
std::vector<Eigen::MatrixXd> delta_weights = network_calc::created_zeroed_layers(&weights);
std::vector<Eigen::MatrixXd> delta_biases = network_calc::created_zeroed_layers(&biases);
for (size_t i = 0; i < batch.size(); i++)
{
auto backprop_output_pair = (*this).backpropagate(batch[i].first, batch[i].second);
for (size_t j = 0; j < backprop_output_pair.first.size(); j++)
{
delta_weights[j] += backprop_output_pair.second[j];
delta_biases[j] += backprop_output_pair.first[j];
}
}
for (size_t i = 0; i < weights.size(); i++)
{
weights[i] -= (alpha / batch.size()) * delta_weights[i];
biases[i] -= (alpha / batch.size()) * delta_biases[i];
}
}
Для загрузки данных MNIST я использую это хранилище, нормализуйте интенсивность пикселей в диапазоне 0-1, а затем обучите / подтвердите его следующим образом:
Eigen::MatrixXd convert_mnist_image_to_vector(std::vector<uint8_t> image)
{
// Normalize range to 0,1
Eigen::VectorXd input(784);
for (size_t i = 0; i < image.size(); i++)
{
input(i) = (image[i])
/ (255.0);
}
return input;
}
Eigen::MatrixXd convert_mnist_label_to_vector(double value)
{
Eigen::MatrixXd output = Eigen::MatrixXd::Zero(10, 1);
output((int)value, 0) = 1.0;
return output;
}
std::vector<xy_data> prepare_mnist_output_data()
{
// Create output vectors with a value of 1 where the activation happened, and turn the input vectors into eigen vectors
// Load MNIST data
mnist::MNIST_dataset<> dataset =
mnist::read_dataset<>();
std::vector<xy_data> mnist_examples;
mnist_examples.reserve(dataset.training_labels.size());
std::cout << "Nbr of training images = " << dataset.training_images.size() << std::endl;
std::cout << "Nbr of training labels = " << dataset.training_labels.size() << std::endl;
for (size_t i = 0; i < dataset.training_labels.size(); i++)
{
Eigen::MatrixXd input = convert_mnist_image_to_vector(dataset.training_images[i]);
Eigen::MatrixXd output = convert_mnist_label_to_vector(dataset.training_labels[i]);
mnist_examples.push_back(std::make_pair(input, output));
//std::cout << i << "\n";
}
std::cout << "MNIST data preparation complete." << "\n";
return mnist_examples;
}
std::vector<xy_data> prepare_mnist_test_data()
{
// Create output vectors with a value of 1 where the activation happened, and turn the input vectors into eigen vectors
// Load MNIST data
mnist::MNIST_dataset<> dataset =
mnist::read_dataset<>();
std::vector<xy_data> mnist_examples;
mnist_examples.reserve(dataset.test_labels.size());
std::cout << "Nbr of test images = " << dataset.test_images.size() << std::endl;
std::cout << "Nbr of test labels = " << dataset.test_labels.size() << std::endl;
for (size_t i = 0; i < dataset.test_labels.size(); i++)
{
Eigen::MatrixXd input = convert_mnist_image_to_vector(dataset.test_images[i]);
Eigen::MatrixXd output = convert_mnist_label_to_vector(dataset.test_labels[i]);
mnist_examples.push_back(std::make_pair(input, output));
//std::cout << i << "\n";
}
std::cout << "MNIST data preparation complete." << "\n";
return mnist_examples;
}
int argmax(Eigen::MatrixXd mat)
{
//helper method intended only for mnist
double greatest = 0;
int greatest_idx = 0;
for (size_t i = 0; i < 10; i++)
{
if (mat(i, 0) > greatest)
{
greatest_idx = i;
}
}
return greatest_idx;
}
void validate(Network *network, std::vector<xy_data> test_data)
{
int matches = 0;
for (size_t i = 0; i < test_data.size(); i++)
{
auto result = std::get<2>(network->feed_forward(test_data[i].first));
int expected_val = argmax(test_data[i].second);
int activation_val = argmax(result.back());
if (activation_val == expected_val)
{
matches++;
std::cout << matches << "\n";
}
}
}
int main(int argc, char* argv[]) {
Network mnist_network = Network(std::vector<int> { 784, 100, 10 });
std::vector<xy_data> dataset = prepare_mnist_output_data();
std::vector<xy_data> test_dataset = prepare_mnist_test_data();
validate(&mnist_network, test_dataset);
mnist_network.mini_batch_gradient_descent(3.0, 30, 10, dataset);
return 0;
}