EasyPR源码剖析(9):字符识别

在上一篇文章的介绍中,我们已经通过相应的字符分割方法,将车牌区域进行分割,得到7个分割字符图块,接下来要做的就是将字符图块放入训练好的神经网络模型,通过模型来预测每个图块所表示的具体字符。神经网络的介绍和训练过程我们将在下一节中具体介绍,本节主要介绍字符特征的提取,和如何通过训练好的神经网络模型来进行字符的识别。

字符识别主要是通过 类CharsIdentify 来进行,对于中文字符和非中文字符,分别采取了不同的策略,训练得到的ANN模型也不一样,中文字符的识别主要使用 identifyChinese 来处理,非中文字符的识别主要采用 identify 来处理。另外,类CharsIdentify采用了单例模式,具体的初始化代码和构造函数如下:

   CharsIdentify* CharsIdentify::instance_ = nullptr;

   CharsIdentify* CharsIdentify::instance() {
if (!instance_) {
instance_ = new CharsIdentify;
}
return instance_;
} CharsIdentify::CharsIdentify() {
ann_ = ml::ANN_MLP::load<ml::ANN_MLP>(kDefaultAnnPath);
annChinese_ = ml::ANN_MLP::load<ml::ANN_MLP>(kChineseAnnPath);
kv_ = std::shared_ptr<Kv>(new Kv);
kv_->load("etc/province_mapping");
} void CharsIdentify::LoadModel(std::string path) {
if (path != std::string(kDefaultAnnPath)) { if (!ann_->empty())
ann_->clear(); ann_ = ml::ANN_MLP::load<ml::ANN_MLP>(path);
}
} void CharsIdentify::LoadChineseModel(std::string path) {
if (path != std::string(kChineseAnnPath)) { if (!annChinese_->empty())
annChinese_->clear(); annChinese_ = ml::ANN_MLP::load<ml::ANN_MLP>(path);
}
}

这边单例模式只考虑了单线程情况,对于多线程的话,需要加入双重锁定。此处处理中文字符和非中文字符,分别加载了不同的ANN模型文件,ANN模型通过opencv 中机器学习中自带的神经网络模型 ml::ANN_MLP 来实现。

字符特征获取

通过神经网络对字符图块进行判别,首先需要获取字符图块的特征,字符特征的获取,主要通过 charFeatures 函数来实现。具体的函数代码如下所示:

 Mat charFeatures(Mat in, int sizeData) {
const int VERTICAL = ;
const int HORIZONTAL = ; // cut the cetner, will afect 5% perices.
Rect _rect = GetCenterRect(in);
Mat tmpIn = CutTheRect(in, _rect);
//Mat tmpIn = in.clone(); // Low data feature
Mat lowData;
resize(tmpIn, lowData, Size(sizeData, sizeData)); // Histogram features
Mat vhist = ProjectedHistogram(lowData, VERTICAL);
Mat hhist = ProjectedHistogram(lowData, HORIZONTAL); // Last 10 is the number of moments components
int numCols = vhist.cols + hhist.cols + lowData.cols * lowData.cols; Mat out = Mat::zeros(, numCols, CV_32F);
// Asign values to int j = ;
for (int i = ; i < vhist.cols; i++) {
out.at<float>(j) = vhist.at<float>(i);
j++;
}
for (int i = ; i < hhist.cols; i++) {
out.at<float>(j) = hhist.at<float>(i);
j++;
}
for (int x = ; x < lowData.cols; x++) {
for (int y = ; y < lowData.rows; y++) {
out.at<float>(j) += (float)lowData.at <unsigned char>(x, y);
j++;
}
} //std::cout << out << std::endl; return out;
}

对于中文字符和英文字符,默认的图块大小是不一样的,中文字符默认是 20*20,非中文默认是10*10。

  • GetCenterRect 函数主要用于获取字符的边框,分别查找从四个角落查找字符的位置;
  • CutTheRect 函数裁剪原图,即将字符移动到图像的中间位置,通过这一步的操作,可将字符识别的准确率提高5%左右;
  • ProjectedHistogram 函数用于获取归一化序列,归一化到0-1区间范围内;

GetCenterRect 函数具体代码如下:

 Rect GetCenterRect(Mat &in) {
Rect _rect; int top = ;
int bottom = in.rows - ; // find the center rect for (int i = ; i < in.rows; ++i) {
bool bFind = false;
for (int j = ; j < in.cols; ++j) {
if (in.data[i * in.step[] + j] > ) {
top = i;
bFind = true;
break;
}
}
if (bFind) {
break;
} }
for (int i = in.rows - ;
i >= ;
--i) {
bool bFind = false;
for (int j = ; j < in.cols; ++j) {
if (in.data[i * in.step[] + j] > ) {
bottom = i;
bFind = true;
break;
}
}
if (bFind) {
break;
} } int left = ;
int right = in.cols - ;
for (int j = ; j < in.cols; ++j) {
bool bFind = false;
for (int i = ; i < in.rows; ++i) {
if (in.data[i * in.step[] + j] > ) {
left = j;
bFind = true;
break;
}
}
if (bFind) {
break;
} }
for (int j = in.cols - ;
j >= ;
--j) {
bool bFind = false;
for (int i = ; i < in.rows; ++i) {
if (in.data[i * in.step[] + j] > ) {
right = j;
bFind = true; break;
}
}
if (bFind) {
break;
}
} _rect.x = left;
_rect.y = top;
_rect.width = right - left + ;
_rect.height = bottom - top + ; return _rect;
}

CutTheRect 函数具体代码如下:

 Mat CutTheRect(Mat &in, Rect &rect) {
int size = in.cols; // (rect.width>rect.height)?rect.width:rect.height;
Mat dstMat(size, size, CV_8UC1);
dstMat.setTo(Scalar(, , )); int x = (int) floor((float) (size - rect.width) / 2.0f);
int y = (int) floor((float) (size - rect.height) / 2.0f); for (int i = ; i < rect.height; ++i) { for (int j = ; j < rect.width; ++j) {
dstMat.data[dstMat.step[] * (i + y) + j + x] =
in.data[in.step[] * (i + rect.y) + j + rect.x];
}
} //
return dstMat;
}

ProjectedHistogram 函数代码如下:

 float countOfBigValue(Mat &mat, int iValue) {
float iCount = 0.0;
if (mat.rows > ) {
for (int i = ; i < mat.rows; ++i) {
if (mat.data[i * mat.step[]] > iValue) {
iCount += 1.0;
}
}
return iCount; } else {
for (int i = ; i < mat.cols; ++i) {
if (mat.data[i] > iValue) {
iCount += 1.0;
}
} return iCount;
}
} Mat ProjectedHistogram(Mat img, int t) {
int sz = (t) ? img.rows : img.cols;
Mat mhist = Mat::zeros(, sz, CV_32F); for (int j = ; j < sz; j++) {
Mat data = (t) ? img.row(j) : img.col(j); mhist.at<float>(j) = countOfBigValue(data, );
} // Normalize histogram
double min, max;
minMaxLoc(mhist, &min, &max); if (max > )
mhist.convertTo(mhist, -, 1.0f / max, ); //归一化 0-1 return mhist;
}

通过上述代码可知,非中文字符和中文字符获得的字符特征个数是不同的,非中文字符features个数为 10+10+10*10=120,中文字符features个数为  20+20+20*20=440。

字符识别

通过上述函数获取字符特征之后,可以通过神经网络模型对车牌字符进行识别,具体的识别函数如下所示:

   int CharsIdentify::classify(cv::Mat f, float& maxVal, bool isChinses){
int result = -; cv::Mat output(, kCharsTotalNumber, CV_32FC1);
ann_->predict(f, output); maxVal = -.f;
if (!isChinses) {
result = ;
for (int j = ; j < kCharactersNumber; j++) {
float val = output.at<float>(j);
// std::cout << "j:" << j << "val:" << val << std::endl;
if (val > maxVal) {
maxVal = val;
result = j;
}
}
}
else {
result = kCharactersNumber;
for (int j = kCharactersNumber; j < kCharsTotalNumber; j++) {
float val = output.at<float>(j);
//std::cout << "j:" << j << "val:" << val << std::endl;
if (val > maxVal) {
maxVal = val;
result = j;
}
}
}
//std::cout << "maxVal:" << maxVal << std::endl;
return result;
}

ann_为之前加载得到的神经网路模型,直接调用其 predict() 函数,即可得到输出矩阵 output,输出矩阵中最大的值即为识别的车牌字符,其中,数值分别为0-64的65个数字,对应的值如下所示:

static const char *kChars[] = {
"", "", "",
"", "", "",
"", "", "",
"",
/* 10 */
"A", "B", "C",
"D", "E", "F",
"G", "H", /* {"I", "I"} */
"J", "K", "L",
"M", "N", /* {"O", "O"} */
"P", "Q", "R",
"S", "T", "U",
"V", "W", "X",
"Y", "Z",
/* 24 */
"zh_cuan" , "zh_e" , "zh_gan" ,
"zh_gan1" , "zh_gui" , "zh_gui1" ,
"zh_hei" , "zh_hu" , "zh_ji" ,
"zh_jin" , "zh_jing" , "zh_jl" ,
"zh_liao" , "zh_lu" , "zh_meng" ,
"zh_min" , "zh_ning" , "zh_qing" ,
"zh_qiong", "zh_shan" , "zh_su" ,
"zh_sx" , "zh_wan" , "zh_xiang",
"zh_xin" , "zh_yu" , "zh_yu1" ,
"zh_yue" , "zh_yun" , "zh_zang" ,
"zh_zhe"
/* 31 */
};

其中26个英文字母中,因为I 和 O容易和数字的 1和0 混淆,因此被去除了,后面31个中文字符分别对应中国的31个行政区域(港澳台暂不考虑)。将识别的各个字符整体输出,就得到了最终的结果。

上一篇:雾霾天出行,如何精确避开“雷区”?2016 SODA数据侠十强


下一篇:基于原生JS的jsonp方法的实现