参考视频教程download:
**深度学习之目标检测常用算法原理+实践精讲 **
Viola-Jones人脸检测算法是第一个实时的人脸检测算法。其影响力就不用多说了,即便是现在,该算法的应用仍然非常广泛。众所周知,Viola-Jones算法分为三个部分,Harr特征和积分图,特征选择的AdaptBoost以及用于训练的Cascade模型。对于Cascade模型,它更多的表示的是一种Strategy,这可以当作一个另外的类别了,这个类别可以看作算法的一种"细节"处理,不同的人对其有不同的看法。Cascade模型主要的目的是降低训练时间,更重要的是使得分类器具有一般性,将重点放在Object和Object邻域的那些样本点。类似于这样的Strategy还有像Kalal在他的博士论文TLD中提出的Sampling Strategy和另外的一种Cascade模型,都能够有效的降低训练时间及使分类器具有一般性,这样的一些Strategy都属于Bootstrapping,也就是训练时用一些策略做出适当的引导,而不是盲目的训练所有的样本点。下面一起看看Harr怎么回事吧。
Harr特征和积分图
1 理论
就我而言Harr特征是属于Regional的特征表达方法(Focus on regional information),对于一个给定的图像窗口,Harr对该窗口的表达方式是用该窗口中的不同局域部分中的灰度差来表示的。如下图所示(a)表示一个窗口I,(b)表示Harr的一个模式,(c)表示在窗口I中计算(b)模式的Harr特征值时,只有白色和黑色部分被考虑了(也就是Focus on regional information)。
Harr特征常用的模式有:
Harr特征值的计算公式为(窗口大小N*N):
对于给定的一个窗口和一个Harr特征模式,如果每一个特征值的计算过程中都对一个模式中的黑色部分和白色部分进行和的计算的话,那么效率就太低了,这里积分图技术就大显身手了。对于给定的一个N*N的窗口I,其积分图计算公式如下:
于是对一个窗口图像中的一个方形内的像素求和计算方式如下:
计算积分图的算法描述如下:
2 实现
2.1基本数据结构
为了方便算法的实现,这里设计了一个ImgData数据结构,该结构将注意力集中在了图像的数据部分,也许是不太完善的,但目前能够满足我的需求,你也是很容易对它做一改动去满足你的需求,比如基于ImgData设计一个三通道的图像数据结构。
登录后复制
#ifndef _IMGDATA_H_
#define_IMGDATA_H_
#include<iostream>
#include<cassert>
/**
*@ ImgDatais a template class that is used for operating only on p_w_picpath data
* and simply focus on one channel right now,but this calss could be used for
* constructing the 3 or more channels operationtemplate class
*
*@parammdata pointer pointing to p_w_picpath data
*@parammrows number of rows for p_w_picpath data
*@parammcols number of cols for p_w_picpath data
*/
template<typenameT> classImgData {
private:
T** mdata;
unsignedintmrows;
unsignedintmcols;
public:
/**
*@brief constructor function with defaultinitialization 0
*
*@param rows number of rows for p_w_picpath data
*@param cols number of cols for p_w_picpath data
*@val initial value for p_w_picpath data, default valueis 0
*/
ImgData(unsignedintrows,unsignedintcols,Tval=0) {
assert(rows > 0 && cols > 0);
mrows= rows;
mcols= cols;
mdata= newT*[mrows];
for(unsignedinti=0; i<mrows; i++) {
mdata[i]= newT[mcols];
}
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]= val;
}
}
}
/**
*@brief copy constructor
*/
ImgData(constImgData<T>&data){
mrows= data.rows();
mcols= data.cols();
mdata= newT*[mrows];
for(unsignedinti=0; i<mrows; i++) {
mdata[i]= newT[mcols];
}
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]= data[i][j];
}
}
}
/**
*@breif destructor
*/
~ImgData(){
for(unsignedinti=0; i<mrows; i++) {
delete[]mdata[i];
}
delete[]mdata;
}
/**
*@brief overload assignment operator
*/
ImgData<T>&operator=(constImgData<T>& data) {
if(mrows!= data.rows() || mcols != data.rows()){
for(unsignedinti=0; i<mrows; i++) {
delete[]mdata[i];
}
delete[]mdata;
}
mrows= data.rows();
mcols= data.cols();
mdata= newT*[mrows];
for(unsignedinti=0; i<mrows; i++) {
mdata[i]= newT[mcols];
}
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]= data[i][j];
}
}
return*this;
}
/**
*@brief set p_w_picpath data's value
*
*@param data a double layer pointer pointingto data going to be set to p_w_picpath data
*/
voidsetData(constT**data=NULL) {
assert(data != NULL);
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]= data[i][j];
}
}
}
/**
*@brief overload [] operator for likedata[][]operator use
*/
T* operator[](unsignedintn)const{
assert(n < mrows);
returnmdata[n];
}
/**
*@brief overload -= operator for all p_w_picpathdata minus val individually
*/
voidoperator-=(Tval){
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]-= val;
}
}
}
/**
*@brief overload += operator for all p_w_picpathdata plus val individually
*/
voidoperator+=(Tval){
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]+= val;
}
}
}
/**
*@brief overload *= operator for all p_w_picpathdata multiple val individually
*/
voidoperator*=(Tval){
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]*= val;
}
}
}
/**
*@brief overload /= operator for all p_w_picpath datadevide val individually
*/
voidoperator/=(Tval){
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
mdata[i][j]/= val;
}
}
}
/**
*@brief ImageData .* ImageData
*
*@return a const ImageData
*/
constImgData<T>cwiseProduct(constImgData<T>& imgData) const{
assert(mrows == imgData.rows() && mcols ==imgData.cols());
ImgData<T>temp(imgData.rows(), imgData.cols());
T** tempData = temp.data();
T** data = imgData.data();
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
tempData[i][j]= mdata[i][j] * data[i][j];
}
}
returntemp;
}
/**
*@brief compute sum of p_w_picpath data
*
*@return a long double type sum of p_w_picpath data
*/
longdoublesum() const {
longdoublep_w_picpathSum = 0;
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
p_w_picpathSum+= mdata[i][j];
}
}
returnp_w_picpathSum;
}
/**
*@brief get pointer of p_w_picpath data for changingdata's value use
*/
T** data() const {
returnmdata;
}
/**
*@brief get number rows for p_w_picpath data
*/
unsignedintrows() const {
returnmrows;
}
/**
*@brief get number cols for p_w_picpath data
*/
unsignedintcols() const {
returnmcols;
}
/**
*@brief print p_w_picpath data for validation use
*/
voidprint() const {
for(unsignedinti=0; i<mrows; i++) {
for(unsignedintj=0; j<mcols; j++) {
std::cout<< mdata[i][j] << " ";
}
std::cout<< std::endl;
}
}
};
#endif
2.2 积分图相关实现
这里提供了一个计算积分图的实现,以及提供了一个计算积分图内的一个矩形区域的和的方法。
登录后复制
#ifndef _INTEGRALIMG_H_
#define_INTEGRALIMG_H_
#include<cassert>
#include"cvImgData.h"
/**
* @briefintegral p_w_picpath in linear time
*
* @paramp_w_picpath original p_w_picpath
* @returnintegralImage integral p_w_picpath
*/
//compute an integral p_w_picpath in linear time
template<typenameT>
void buildIntegralImage(
constImgData<T>&p_w_picpath
, ImgData<T>&integralImage
){
assert(p_w_picpath.rows() == integralImage.rows());
assert(p_w_picpath.cols() == p_w_picpath.cols());
intnRows = p_w_picpath.rows();
intnCols = p_w_picpath.cols();
//O(n)complexity: however no parallel should be used
T** imgData = p_w_picpath.data();
T** integralImgData = integralImage.data();
for(inti = 0; i < nRows; i++)
for(intj = 0; j < nCols; j++){
integralImgData[i][j]= imgData[i][j];
integralImgData[i][j]+= i > 0 ? integralImgData[i-1][j] : 0;
integralImgData[i][j]+= j > 0 ? integralImgData[i][j-1] : 0;
integralImgData[i][j]-= std::min(i,j) > 0 ? integralImgData[i-1][j-1] : 0;
}
}
/**
* @brief sumup the pixels in a sub-rectangle of the p_w_picpath
*
* @paramintegralImage its integral p_w_picpath
* @param uirow index of this sub-rectangle's up left corner
* @param ujcolumn index of this sub-rectangle's up left corner
* @param irnumber of rows of this sub-rectangle
* @param jrnumber of columns of this sub-rectangle
* @returnthe sum
*/
template<typenameT>
longdoublesumImagePart(
constImgData<T>&integralImage
, intui
, intuj
, intir
, intjr
){
//rectangle'sother pair of coordinates
intdi = ui + ir- 1;
intdj = uj + jr- 1;
//standardoperation
T** integralData = integralImage.data();
longdoublesum = integralData[di][dj];
sum-= ui > 0 ? integralData[ui-1][dj]: 0;
sum-= uj > 0 ? integralData[di][uj-1]: 0;
sum+= std::min(ui, uj)> 0 ? integralData[ui-1][uj-1]: 0;
returnsum;
}
#endif
2.3 Harr特征实现
对于Harr特征的计算,这里实现了Viola-Jones算法中的5种基本模式。
登录后复制
#ifndef _HARR_H_
#define_HARR_H_
#include<iostream>
#include<fstream>
#include<vector>
#include<stack>
#include<string>
#include<cassert>
#include"cvImgData.h"
#include"cvIntegralImg.h"
usingnamespacestd;
#defineFOLLOW_VJtrue
#defineSTD_NORM_CONST1e4
#defineFLAT_THRESHOLD1
/**
* @briefcompute Haar-like features with integral p_w_picpath,
* mostly for training samples use.
*
* @paramintegralImage its integral p_w_picpath
* @paramfeatures all the features from this p_w_picpath
* @paramenforceShape the original Viola-Jones's proposal or more extensive definition
* of rectangle features (requires significantlymore memory if disabled)
*/
inlineintsumOfElement(intarray[],intk){
intret = 0;
for(inti=0; i<k; i++) {
ret+= array[i];
}
returnret;
}
voidcomputeHaarLikeFeaturesOnlyUsingIntegralImage(
ImgData<double>&p_w_picpath
, vector<double>&features
, boolenforceShape= true
){
if(features.size()> 0) {
features.resize(0);
}
//integralp_w_picpath
intnRows = p_w_picpath.rows();
intnCols = p_w_picpath.cols();
//firstnormalize the example, this might prove key to learning success
//meanvalue
floatmean = static_cast<float>(p_w_picpath.sum()/(float)(nRows*nCols));
p_w_picpath-= mean;
floatstd = static_cast<float>(sqrt(p_w_picpath.cwiseProduct(p_w_picpath).sum()/(float)(nRows*nCols)));
if(std< FLAT_THRESHOLD || _isnan(std)){
std= FLAT_THRESHOLD;
cout<< "Find one completely flat example. Don't worry.It'll be treated as an outlier presumably."<< endl;
}
//fullyuse the float range
p_w_picpath*= STD_NORM_CONST;
p_w_picpath/= std;
ImgData<double>integralImage(nRows, nCols);
buildIntegralImage(p_w_picpath,integralImage);
//twohorizontal patterns
for(intportions = 2; portions < 4; portions++){
//upleft coordinate
for(inti = 0; i < nRows; i++)
for(intj = 0; j < nCols; j++){
//howlong can l1 be
intl1MIN = portions == 2 ? 0 : 1;
intl1MAX = portions == 2 ? 0 : nCols - j;
//height
for(intir = 1; ir <= nRows - i; ir++)
for(intl1 = l1MIN; l1 <= l1MAX; l1++)
for(intl2 = 1; l2 <= nCols-j-l1; l2++)
for(intl3 = 1; l3 <= nCols-j-l1-l2; l3++){
//makethese lengths more intelligible
intlengths[3];
lengths[0]= l1;
lengths[1]= l2;
lengths[2]= l3;
//nowgo for the feature
doublefeature = 0;
if(portions== 2){
if(enforceShape&& l2 != l3)
continue;
feature+= (float)sumImagePart(integralImage, i, j,ir, l2);
feature-= (float)sumImagePart(integralImage, i, j+l2,ir, l3);
}else{
if(enforceShape&& FOLLOW_VJ && !(l1 == l3 && l1== l2))
continue;
if(enforceShape&& !FOLLOW_VJ && !(l1 == l3 && l1>= l2))
continue;
for(intk = 0; k < 3; k++){
intfactor = static_cast<int>(pow(-1.,k));
intadvance = k == 0 ? 0 : sumOfElement(lengths, k);
feature+= factor*(float)sumImagePart(integralImage, i,j+advance, ir, lengths[k]);
}
}
//store
features.push_back(feature);
}
}
}
//twovertical patterns: if you go my way, only one vertical pattern
for(intportions = 2; portions < 4; portions++){
//upleft coordinate
for(inti = 0; i < nRows; i++)
for(intj = 0; j < nCols; j++){
intl1MIN = portions == 2 ? 0 : 1;
intl1MAX = portions == 2 ? 0 : nRows - i;
//width
for(intjr = 1; jr <= nCols-j; jr++)
for(intl1 = l1MIN; l1 <= l1MAX; l1++)
for(intl2 = 1; l2 <= nRows-i-l1; l2++)
for(intl3 = 1; l3 <= nRows-i-l1-l2; l3++){
//makethese lengths more intelligible
intlengths[3];
lengths[0]= l1;
lengths[1]= l2;
lengths[2]= l3;
//nowgo for the feature
doublefeature = 0;
if(portions== 2){
if(enforceShape&& !(l2 == l3))
continue;
feature+= (float)sumImagePart(integralImage, i, j,l2, jr);
feature-= (float)sumImagePart(integralImage, i+l2, j,l3, jr);
}else{
if(enforceShape&& FOLLOW_VJ && !(l1 == l2 && l1== l3))
continue;
//if(enforceShape&& FOLLOW_VJ)
if(enforceShape&& !FOLLOW_VJ)
continue;
for(intk = 0; k < 3; k++){
intfactor = static_cast<int>(pow(-1.,k));
intadvance = k == 0 ? 0 : sumOfElement(lengths, k);
feature+= factor*(float)sumImagePart(integralImage,i+advance, j, lengths[k], jr);
}
}
//store
features.push_back(feature);
}
}
}
//amixed pattern
for(inti = 0; i < nRows; i++)
for(intj = 0; j < nCols; j++)
for(inti_l1 = 1; i_l1 < nRows - i; i_l1++)
for(inti_l2 = 1; i_l2 <= nRows - i - i_l1; i_l2++)
for(intj_l1 = 1; j_l1 < nCols - j; j_l1++)
for(intj_l2 = 1; j_l2 <= nCols - j - j_l1; j_l2++){
if(enforceShape&& !(i_l1 == i_l2 && j_l1 == j_l2))
continue;
doublefeature = 0;
feature+= (float)sumImagePart(integralImage, i, j,i_l1, j_l1);
feature-= (float)sumImagePart(integralImage, i, j +j_l1, i_l1, j_l2);
feature-= (float)sumImagePart(integralImage, i +i_l1, j, i_l2, j_l1);
feature+= (float)sumImagePart(integralImage, i + i_l1,j + j_l1, i_l2, j_l2);
//record
features.push_back(feature);
}
}
reference:
Yi-Qing Wang, An Analysis of the Viola-Jones Face Detection Algorithm, IPOL.