We turn next to the task of finding a weight vector w which minimizes the chosen function E(w).
Because there is clearly no hope of finding an anlytical solution to the equation ∂E(w)=0, we resort to
iterative numerical procedures.
On-line gradient descent, also known as sequential gradient descent or stochastic gradient descent, makes
an update to the weight vector based on one data point at a time.
One advantage of on-line methods compared to batch methods is that the former handle redundancy in the data
much more efficiently. Another property of on-line gradient descent is the possibility of escaping from local minima,
since a stationary point with respect to the error function for the whole data set will generally not be a stationary point
for each data point individually.
Another advantage of on-line learning is the fact that it requires much less storage than batch learning.
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cfloat>
#include <cmath>
double dis(double &train, double &query) {
double weight=exp(-0.5*pow(train-query, 2));
return weight;
}
/*最小二乘法*/
template <typename PairIterator>
bool GetLinearFit(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
if(begin_it==end_it) {
return false;
}
size_t n=end_it-begin_it;
double sum_x2=0.0,sum_y=0.0,sum_x=0.0,sum_xy=0.0;
for(PairIterator it=begin_it;it!=end_it;++it) {
sum_x2+=(it->first)*(it->first);
sum_y+=it->second;
sum_x+=it->first;
sum_xy+=(it->first)*(it->second);
}
slope=(n*sum_xy-sum_x*sum_y)/(n*sum_x2-sum_x*sum_x);
y_intercept=(sum_x2*sum_y-sum_x*sum_xy)/(n*sum_x2-sum_x*sum_x);
return true;
}
/*locally weighted linear regression(LWR)*/
template<typename PairIterator>
bool LWR(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
if(begin_it==end_it) {
return false;
}
/*x are the data points for each local regression model. They are usually (but not always) the data points in your sample.*/
double query=5.5;
size_t n=end_it-begin_it;
double J=0.0;
for(PairIterator it=begin_it;it!=end_it;++it) {
J+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second)*dis(it->first, query);
}
J=J*0.5/n;
while(true) {
double temp0=0,temp1=0;
for(PairIterator it=begin_it;it!=end_it;++it) {
temp0+=(y_intercept+slope*(it->first)-it->second)*dis(it->first, query);
temp1+=(y_intercept+slope*(it->first)-it->second)*(it->first)*dis(it->first, query);
}
temp0=temp0/n;
temp1=temp1/n;
/*0.03为学习率阿尔法*/
y_intercept=y_intercept-0.03*temp0;
slope=slope-0.03*temp1;
double MSE=0.0;
for(PairIterator it=begin_it;it!=end_it;++it) {
MSE+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second)*dis(it->first, query);
}
MSE=0.5*MSE/n;
if(std::abs(J-MSE)<0.00000001)
break;
J=MSE;
}
return true;
}
/*批量梯度下降法,Batch Gradient Desscent,BGD*/
template<typename PairIterator>
bool BatchGradientDescent(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
if(begin_it==end_it) {
return false;
}
size_t n=end_it-begin_it;
double J=0.0;
/*the initial cost function*/
for(PairIterator it=begin_it;it!=end_it;++it) {
J+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
}
J=J*0.5/n;
while(true) {
double temp0=0,temp1=0;
for(PairIterator it=begin_it;it!=end_it;++it) {
temp0+=(y_intercept+slope*(it->first)-it->second);
temp1+=(y_intercept+slope*(it->first)-it->second)*(it->first);
}
temp0=temp0/n;
temp1=temp1/n;
/*0.03为学习率阿尔法*/
y_intercept=y_intercept-0.03*temp0;
slope=slope-0.03*temp1;
double MSE=0.0;
for(PairIterator it=begin_it;it!=end_it;++it) {
MSE+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
}
MSE=0.5*MSE/n;
if(std::abs(J-MSE)<0.00000001)
break;
J=MSE;
}
return true;
}
/*随机梯度下降法,Stochastic Gradient Desscent,SGD*/
template<typename PairIterator>
bool StochasticGradientDescent(PairIterator begin_it, PairIterator end_it, double& slope, double& y_intercept) {
if(begin_it==end_it) {
return false;
}
size_t n=end_it-begin_it;
double J=0.0;
/*the initial cost function*/
for(PairIterator it=begin_it;it!=end_it;++it) {
J+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
}
J=0.5*J/n;
while(true) {
double temp0=0,temp1=0;
for(PairIterator it=begin_it;it!=end_it;++it) {
temp0=(y_intercept+slope*(it->first)-it->second);
temp1=(y_intercept+slope*(it->first)-it->second)*(it->first);
/*0.03为学习率阿尔法*/
y_intercept=y_intercept-0.03*temp0;
slope=slope-0.03*temp1;
double MSE=0.0;
for(PairIterator it=begin_it;it!=end_it;++it) {
MSE+=(y_intercept+slope*(it->first)-it->second)*(y_intercept+slope*(it->first)-it->second);
}
MSE=0.5*MSE/n;
if(std::abs(J-MSE)<0.00000001)
break;
J=MSE;
}
break;
}
return true;
}
int main() {
std::ifstream in;
in.open("ex2x.dat");
if(!in) {
std::cout<<"open file ex2x.dat failed!"<<std::endl;
return 1;
}
std::vector<double> datax,datay;
double temp;
while(in>>temp) {
datax.push_back(temp);
}
in.close();
in.open("ex2y.dat");
if(!in) {
std::cout<<"open file ex2y.dat failed!"<<std::endl;
return 1;
}
while(in>>temp) {
datay.push_back(temp);
}
std::vector<std::pair<double, double> > data;
for(std::vector<double>::const_iterator iterx=datax.begin(),itery=datay.begin();iterx!=datax.end(),itery!=datay.end();iterx++,itery++) {
data.push_back(std::pair<double,double>(*iterx,*itery));
}
in.close();
double slope=0.0;
double y_intercept=0.0;
GetLinearFit(data.begin(),data.end(),slope,y_intercept);
std::cout<<"最小二乘法得到的结果:"<<std::endl;
std::cout<<"slope: "<<slope<<std::endl;
std::cout<<"y_intercept: "<<y_intercept<<std::endl;
slope=1.0,y_intercept=1.0;
BatchGradientDescent(data.begin(),data.end(),slope,y_intercept);
std::cout<<"批量梯度下降法得到的结果:"<<std::endl;
std::cout<<"slope: "<<slope<<std::endl;
std::cout<<"y_intercept: "<<y_intercept<<std::endl;
slope=1.0,y_intercept=1.0;
StochasticGradientDescent(data.begin(),data.end(),slope,y_intercept);
std::cout<<"随机梯度下降法得到的结果:"<<std::endl;
std::cout<<"slope: "<<slope<<std::endl;
std::cout<<"y_intercept: "<<y_intercept<<std::endl;
slope=1.0,y_intercept=1.0;
LWR(data.begin(),data.end(),slope,y_intercept);
std::cout<<"locally weighted linear regression 得到的结果:"<<std::endl;
std::cout<<"slope: "<<slope<<std::endl;
std::cout<<"y_intercept: "<<y_intercept<<std::endl;
return 0;
}