写了三个Logistic Regression的实现,发了好几篇博文,我都有点儿写上瘾了。
// the sample format: classid feature1_value feature2_value... bool LogisticRegression::TrainSGDOnSampleFile ( const char * sFileName, int iClassNum, int iFeatureNum, // about the samples double dLearningRate = 0.05, // about the learning int iMaxLoop = 1, double dMinImproveRatio = 0.01 // about the stop criteria ) { ...... for (int iLoop = 0; iLoop < iMaxLoop; iLoop++) { ...... while (getline (in, sLine)) { Sample theSample; if (ReadSampleFrmLine (sLine, theSample)) { ...... } } ...... if (dTmpRatio < dMinImproveRatio) break; else { ...... //reset the current reading position of file in.clear(); in.seekg (0, ios::beg); } } return true; }
// the sample format: classid feature1_value feature2_value... bool LogisticRegression::TrainSGDOnSampleFileEx2 ( const char * sFileName, int iClassNum, int iFeatureNum, // about the samples double dLearningRate = 0.05, // about the learning int iMaxLoop = 1, double dMinImproveRatio = 0.01 // about the stop criteria ) { ifstream in (sFileName); if (!in) { cerr << "Can not open the file of " << sFileName << endl; return false; } if (!InitThetaMatrix (iClassNum, iFeatureNum)) return false; vector<Sample> SampleVec; if (!LoadAllSamples (sFileName, SampleVec)) return false; double dCost = 0.0; double dPreCost = 100.0; for (int iLoop = 0; iLoop < iMaxLoop; iLoop++) { srand((unsigned)time(NULL)); int iErrNum = 0; int iSampleNum = (int)SampleVec.size(); for (int i=0; i<iSampleNum; i++) { double dRandomFloat = (double)rand() / RAND_MAX; int iSampleIndex = (int)(dRandomFloat * iSampleNum); vector<double> ClassProbVec; int iPredClassIndex = CalcFuncOutByFeaVecForAllClass (SampleVec[iSampleIndex].FeaValNodeVec, ClassProbVec); if (iPredClassIndex != SampleVec[iSampleIndex].iClass) iErrNum++; dCost += UpdateThetaMatrix (SampleVec[iSampleIndex], ClassProbVec, dLearningRate); } dCost /= iSampleNum; double dTmpRatio = (dPreCost - dCost) / dPreCost; double dTmpErrRate = (double)iErrNum / iSampleNum; // show info on screen cout << "In loop " << iLoop << ": current cost (" << dCost << ") previous cost (" << dPreCost << ") ratio (" << dTmpRatio << ") "<< endl; cout << "And Error rate : " << dTmpErrRate << endl; /*if (dTmpRatio < dMinImproveRatio) break; else*/ if (dCost < 0.001) break; { dPreCost = dCost; dCost = 0.0; } } return true; }
vector<Sample> SampleVec; if (!LoadAllSamples (sFileName, SampleVec)) return false;随机挑选样本的代码片段:
double dRandomFloat = (double)rand() / RAND_MAX; int iSampleIndex = (int)(dRandomFloat * iSampleNum);