我尝试使用 Eigen3 实现用于数字识别的 MLP 神经网络,但如果我正常编译并在某个时刻执行它,所有参数(权重、偏差、激活)都会变成 NaN。相反,如果我用 VS Code 调试它,它就可以工作。
这就是网络
#include "..\Headers\Network.h"
#include <cmath>
#include <iostream>
#include <iterator>
#include <string>
#include <vector>
#include <algorithm>
using std::vector;
using std::string;
using Eigen::VectorXd;
using Eigen::MatrixXd;
double sigmoide (double x);
double sigmoide_derivative(double x);
VectorXd sigmoide_derivative(VectorXd vec);
void load_val (string pi, VectorXd& pixels);
std::ofstream _log ("log.txt");
template <typename T>
void print(T& mat)
{
for (int r = 0; r < mat.rows(); r++)
{
for (int c = 0; c < mat.cols(); c++)
{
}
}
}
Network::Network(vector<string> _data, int _l_rate, vector<int> dim) : data{_data}, l_rate{_l_rate}
{
layers = dim.size();
for (int i = 0; i < layers - 1; i++)
{
MatrixXd m = MatrixXd::Random(dim[i + 1], dim[i]);
weights.push_back(m);
VectorXd b = VectorXd::Random(dim[i + 1]);
biases.push_back(b);
VectorXd _z (dim[i + 1]);
z.push_back(_z);
VectorXd n (dim[i]);
neurons.push_back(n);
}
VectorXd n_f (dim[layers - 1]);
neurons.push_back(n_f);
}
void Network::learn(int epoch, int mini_batch)
{
for(int e = 0; e < epoch; e++)
{
std::cout << "Epoch: " << e + 1 << "\n\n";
double e_cost = 0;
shuffle(begin(data), end(data), rng);
for(unsigned long long int n = 0; n < data.size();)
{
e_cost += SGD(mini_batch, n);
}
}
}
double Network::SGD (int mini_batch, unsigned long long int& n_data)
{
vector<VectorXd> p_d_biases = vector<VectorXd>();
vector<MatrixXd> p_d_weights = vector<MatrixXd>();
double b_cost = 0;
for(int m = 0; m < mini_batch && n_data != data.size(); m++, n_data++)
{
feed_forward(data.at(n_data));
SGD_b(p_d_biases, data.at(n_data));
SGD_w(p_d_weights, p_d_biases);
b_cost += cost(data.at(n_data));
}
step(p_d_weights, p_d_biases, mini_batch);
return b_cost / mini_batch;
}
double Network::cost(string sample)
{
VectorXd e_values = VectorXd();
exp_values(sample, e_values);
double s_cost = 0;
for(int i = 0; i < neurons[layers - 1].size(); i++)
{
if(e_values[i] == 1)
{
s_cost += std::pow(neurons.at(layers - 1)(i) - 1, 2);
}
else
{
s_cost += std::pow(neurons.at(layers - 1)(i), 2);
}
}
return s_cost;
}
void Network::step(const vector<MatrixXd>& p_d_weights, const vector<VectorXd>& p_d_biases, int mini_batch)
{
for(int l = layers - 2, n = 0; l >= 0; l--, n++)
{
VectorXd b_tmp (biases.at(l).rows());
MatrixXd w_tmp (weights.at(l).rows(), weights.at(l).cols());
for(int i = 0; i < layers - 1; i++)
{
b_tmp += p_d_biases.at((i * (layers - 1)) + n);
w_tmp += p_d_weights.at((i * (layers - 1)) + n);
}
biases.at(l) -= l_rate * (b_tmp / mini_batch);
weights.at(l) -= l_rate * (w_tmp / mini_batch);
}
}
void Network::SGD_w (vector<MatrixXd>& p_d_weights, const vector<VectorXd>& p_d_biases)
{
for(int l = layers - 2; l >= 0; l--)
{
p_d_weights.push_back(p_d_biases.at(p_d_biases.size() - (l + 1)) * neurons.at(l).transpose());
}
}
void Network::SGD_b (vector<VectorXd>& p_d_biases, string sample)
{
VectorXd e_values = VectorXd();
exp_values(sample, e_values);
for(int l = layers - 1; l > 0; l--)
{
if(l == (layers - 1))
{
p_d_biases.push_back((2*(neurons.at(l) - e_values)).cwiseProduct(sigmoide_derivative(z.at(l-1))));
}
else
{
VectorXd b =(weights.at(l).transpose() * p_d_biases.at(p_d_biases.size() - 1)).cwiseProduct(sigmoide_derivative(z.at(l - 1)));
p_d_biases.push_back(b);
}
}
}
void Network::feed_forward(string sample)
{
load_val(sample, neurons[0]);
for(int l = 0; l < layers - 1; l++)
{
z.at(l) = weights.at(l) * neurons.at(l) + biases.at(l);
for (int i = 0; i < biases.at(l).size(); i++)
{
neurons.at(l + 1)(i) = sigmoide(z.at(l)(i));
}
}
}
void Network::exp_values (string sample, VectorXd& e_values)
{
e_values.resize(neurons[layers - 1].size());
short digit = std::stoi(string(sample.begin(), sample.begin() + 1));
for(int i = 0; i < e_values.size(); i++)
{
if(i - digit == 0)
{
e_values(i) = 1;
}
else
{
e_values(i) = 0;
}
}
}
void load_val (string pi, VectorXd& pixels)
{
int i = 0;
for(auto s = pi.begin() + 3, p = pi.begin() + 1; s != pi.end(); s++)
{
if(*s == ',')
{
double t = std::stoi(std::string(p + 1, s)) / 255.;
pixels(i) = t;
p = s;
i++;
}
if(s == (pi.end() - 1))
{
double t = std::stoi(std::string(s, pi.end())) / 255.;
pixels(i) = t;
}
}
}
VectorXd sigmoide_derivative(VectorXd vec)
{
VectorXd result = VectorXd();
result.resize(vec.size());
for(int r = 0; r < vec.rows(); r++)
{
result(r) = sigmoide_derivative(vec(r));
}
return result;
}
double sigmoide_derivative(double x)
{
return std::exp(x) / std::pow(1 + std::exp(x), 2);
}
double sigmoide (double x)
{
return 1. / (1 + (1. / std::exp(x)));
}
这是主要的
int main()
{
ifstream f_data ("..\\csv_files\\mnist_train.csv");
vector<string> data;
if(f_data.good())
{
while(!(f_data.eof()))
{
string tmp;
f_data >> tmp;
data.push_back(tmp);
}
vector<int> dim {784, 16, 10};
Network n (data, 3, dim);
n.learn(20, 10);
ifstream t_data ("..\\csv_files\\mnist_test.csv");
string s;
t_data >> s;
cout << string(s.begin(), s.begin() + 1) << endl;
n.feed_forward(s);
print(n.getNeurons(2));
}
return 0;
}
我尝试禁用编译器优化,我控制一切都已初始化,但除此之外我不知道该把手放在哪里。我使用 MinGW 的 GCC 作为编译器。
我找到了解决方案,在 sigmoide 和 sigmoide_derivative 函数中,有 exp() 函数,对于 x 高的值返回太大的值,因此它们给出 NaN。我添加了一个控件,如果 NaN 值返回 0。可能在调试模式下,这种情况会被自动管理。