我正在尝试编写一个程序,该程序将为我提供c ++中内核估计器的交叉验证带宽(大约有20,000个数据点,因此Matlab太慢了)。我这样做是通过对遗忘之一估计量的目标的导数使用割线算法来实现的。我的问题是此函数将数据作为参数,该数据是从csv文件中提取到主函数中的。
割线算法的一个参数是一个函数,该函数需要一个double并返回double,但是我为目标的导数编写的函数必须包含大量其他废话,在数学上,我们会考虑使用参数(诸如数据,内核功能的选择等)。
我需要能够编写一个函数,该函数使用定义目标的函数,放入主函数从文件中提取的数据,并将双精度变量作为其唯一输入。有办法吗?
double cvobjective(double data[], int n, double (*k)(double), double (*kd)(double), double h)
{
double cvob = 0;
double xi;
double xj;
for(int i = 0; i < n; ++i)
{
xi = data[i];
double sumki = 0;
double sumkdi = 0;
for(int j = 0; j < n; ++j) //find sum of k((xj-xi)/h) and k'((xj-xi)/h)*(xj-xi)
{
xj = data[j];
sumki = sumki + k((xj-xi)/h);
sumkdi = sumkdi + kd((xj-xi)/h)*(xj-xi);
}
sumki = sumki-k(0);//gets rid of the terms where i=j
sumkdi = sumkdi-kd(0);
cvob = cvob - reciprocal(sumki)*(reciprocal(h)*sumki+reciprocal(pow(h,2))*sumkdi);
}
return cvob;
}
double secantmethod(double (*obj)(double), double init, double tolerance, int giveup)
{
double x = init;
double old = init+1;
double newp;
double fold = obj(old);
double fnew;
for(int i=0;i<giveup;++i)
{
fnew = obj(x);
if(abs(fnew-fold)<tolerance)
{
cout << "Objective values get too close after " << i << " iterations." << endl;
break;
}
newp = newp - (x-old)*reciprocal(fnew-fold)*fnew;
old = x;
x = newp;
cout << "Estimate is currently: " << x << endl;
fold = fnew;
if(abs(fnew)<tolerance)
break;
if(i == giveup - 1)
cout << "Secant algorithm did not converge." << endl;
}
return newp;
}
int main()
{
const int N = 19107;
double incomes[N];
std::ifstream ifile("incomes.csv", std::ios::in);
std::vector<double> scores;
//check to see that the file was opened correctly:
if (!ifile.is_open()) {
std::cerr << "There was a problem opening the input file.\n";
exit(1);//exit or do additional error checking
}
double num = 0.0;
//keep storing values from the text file so long as data exists:
while (ifile >> num) {
scores.push_back(num);
}
//verify that the scores were stored correctly:
for (int i = 0; i < scores.size(); ++i) {
incomes[i]=scores[i];
}
double sv = silverman(incomes,N);
double cvbandwidth = secantmethod(cvobj,sv,0.000001,100);
cout << setprecision(10) << cvbandwidth << endl;
return 0;
}
显然,我省略了一些不重要的外围功能代码。我考虑过是否可以更改secantmethod算法,以便它希望采用一个函数,该函数将cvobjective的所有内容都作为输入,但是我现在还不清楚我该怎么做。 。
理想情况下,我可以在main内部创建一个函数,使incomes数组在该函数的范围内,但是我不能正确地理解lambda,或者它们并不特别适合此目的。失败的话,如果有一种以上述方式更改secantmethod的方法,那也将起作用。
编辑:在上面,cvobj未定义,当前用作占位符。我希望它像是
double cvobj(double h)
return cvobjective(incomes,N,normpdf,normpdfdiff,h);
但是很显然,当我尝试这样做时,它抱怨收入和N不在函数范围内。
所以我认为我找到了一种解决方法,该方法是定义类
class kernelwdata
{
public:
double observations[N];
double cvobj(double h)
{
return cvobjective(observations,N,normpdf,normpdfdiff,h);
}
};
并更改secantmethod,以使其将kernelwdata作为输入。现在,前面显示的代码看起来像
double cvobjective(double data[], int n, double (*k)(double), double (*kd)(double), double h)
{
double cvob = 0;
double xi;
double xj;
for(int i = 0; i < n; ++i)
{
xi = data[i];
double sumki = 0;
double sumkdi = 0;
for(int j = 0; j < n; ++j) //find sum of k((xj-xi)/h) and k'((xj-xi)/h)*(xj-xi)
{
xj = data[j];
if(j==i)
xj=xi+1;
sumki = sumki + k((xj-xi)/h);
sumkdi = sumkdi + kd((xj-xi)/h)*(xj-xi);
}
sumki = sumki-k(1/h);//gets rid of the terms where i=j
sumkdi = sumkdi-kd(1/h);
cvob = cvob - reciprocal(sumki)*(reciprocal(h)*sumki+reciprocal(pow(h,2))*sumkdi);
}
return cvob;
}
class kernelwdata
{
public:
double observations[N];
double cvobj(double h)
{
return cvobjective(observations,N,normpdf,normpdfdiff,h);
}
};
double secantmethod(kernelwdata obj, double init, double tolerance, int giveup)
{
double x = init;
double old = init+1;
double newp;
double fold = obj.cvobj(old);
double fnew;
for(int i=0;i<giveup;++i)
{
fnew = obj.cvobj(x);
cout << "fold is " << fold << " and fnew is " << fnew << endl;
if(abs(fnew-fold)<tolerance)
{
cout << "Objective values get too close after " << i << " iterations." << endl;
break;
}
newp = newp - (x-old)*reciprocal(fnew-fold)*fnew;
old = x;
x = newp;
cout << "Estimate is currently: " << x << endl;
fold = fnew;
if(abs(fnew)<tolerance)
break;
if(i == giveup - 1)
cout << "Secant algorithm did not converge." << endl;
}
return newp;
}
double silverman(double data[], int n)
{
double mean = 0;
for(int counter = 0; counter < n; ++counter)
mean = mean+data[counter]/n;
double sigmahat = 0;
for(int counter = 0; counter < n; ++counter)
{
sigmahat = sigmahat + pow(data[counter]-mean,2);
}
sigmahat = sigmahat/(n-1);
sigmahat = sqrt(sigmahat);
double m = (double) n;
return sigmahat*pow(m,-0.2);
}
int main()
{
kernelwdata incomes;
std::ifstream ifile("incomestest.csv", std::ios::in);
std::vector<double> scores;
//check to see that the file was opened correctly:
if (!ifile.is_open()) {
std::cerr << "There was a problem opening the input file.\n";
exit(1);//exit or do additional error checking
}
double num = 0.0;
//keep storing values from the text file so long as data exists:
while (ifile >> num) {
scores.push_back(num);
}
//verify that the scores were stored correctly:
for (int i = 0; i < scores.size(); ++i) {
incomes.observations[i]=scores[i];
}
//double sv = silverman(incomes.observations,N);
double cvbandwidth = secantmethod(incomes,10,0.000001,100);
cout << "The cross-validation bandwidth is: " << setprecision(10) << cvbandwidth << endl;
return 0;
}
而且我现在正在与输出争论,它给出的是我(肯定是某个地方的数学问题),而不是还不是我可以编译的东西。