基于opencv实现的手写数字识别

一、使用模板匹配算法

match.py:

import os
import Function

root_dir = "digits/train2"
file7_7 = open("digits/picture7_7.txt", 'w+')
for fl in os.listdir(root_dir):
    img_str = fl[0:-4] + ":" + Function.image_compression(root_dir + '/' + fl)
    file7_7.write(img_str + '\n')
file7_7.close()
file7_7 = open("digits/picture7_7.txt", 'r')
root_dir = "digits/test"
Correct_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Error_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Rejection_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

for fl in os.listdir(root_dir):
    Same_dist_number = 0
    Same_class = -1
    min_dist = 7
    dist_img = ""
    test_img_str = Function.image_compression(root_dir + '/' + fl)
    while True:
        line = file7_7.readline()
        if not line:
            file7_7.seek(0)
            break
        train_str = line[-50:-1]
        temp_dist = Function.distance(test_img_str, train_str)
        if temp_dist < min_dist:
            min_dist = temp_dist
            dist_img = line[0:-51]
            Same_dist_number = 0
            Same_class = -1
        elif temp_dist == min_dist:
            Same_dist_number += 1
            if dist_img[0:1] == line[0:1]:
                Same_class = eval(line[0:1])
            else:
                Same_class = -1
    if Same_dist_number == 0:
        print("测试数字:", fl[0:-4], "  --  识别出来的结果:", dist_img)
        if fl[0] == dist_img[0]:
            Correct_rate[eval(fl[0])] += 1
        else:
            Error_rate[eval(fl[0])] += 1
    elif Same_class == -1:
        print("测试数字:", fl[0:-4], "  --  该数字拒绝识别!")
        Rejection_rate[eval(fl[0])] += 1
    else:
        print("测试数字:", fl[0:-4], "  --  识别出来的结果(类):", Same_class)
        if eval(fl[0]) == Same_class:
            Correct_rate[eval(fl[0])] += 1
        else:
            Error_rate[eval(fl[0])] += 1
file7_7.close()
print("------------------------------------------------")
for i in range(10):
    print("数字 {:d} 识别的正确率 = {:.2f}% ,错误率 = {:.2f}% ,拒绝识别率 = {:.2f}%".format(i, Correct_rate[i] * 5, Error_rate[i] * 5,
                                                                            Rejection_rate[i] * 5))
print("成功!")

Function.py:

import cv2


def image_compression(img_path):
    img_str = ""
    img = cv2.imread(img_path)
    x = y = 0
    for k in range(1, 50):
        title_imf = 0
        for i in range(4):
            for j in range(4):
                if img[x + i - 1][y + j - 1][0] > 127:
                    title_imf += 1
        y = (y + 4) % 28
        if title_imf >= 13:
            img_str += '1'
        else:
            img_str += '0'
        if k % 7 == 0:
            x = x + 4
            y = 0
    return img_str


def distance(test_str, train_str):
    len_str = len(train_str)
    dist = 0.0
    for i in range(len_str):
        dist += (eval(test_str[i:i + 1]) - eval(train_str[i:i + 1])) ** 2
    dist **= 0.5
    return dist
 

二、k邻近算法实现

 k.py

import os
import numpy as np
import Function2


def handwritingClassficationTest():
    hwLabels = []
    trainingFileList = os.listdir('digits/trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = Function2.img2vector('digits/trainingDigits/%s' % fileNameStr)
    testFileList = os.listdir('digits/testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = Function2.img2vector('digits/testDigits/%s' % fileNameStr)
        classifierResult = Function2.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print("测试出的数字是: %s, 正确的数字是: %s" % (classifierResult, classNumStr))
        if (classifierResult[0] != classNumStr): errorCount += 1.0
    print("\n 识别错误的个数是: %s" % errorCount)
    print("\n 正确率为: %f" % (1 - errorCount / float(mTest)))


handwritingClassficationTest()
 

Function2.py:

from os import listdir
from numpy import *
import operator


def classify0(inX, dataSet, labels, k):
    dataSetSize = dataSet.shape[0]

    diffMat = tile(inX, (dataSetSize, 1)) - dataSet

    sqDiffMat = diffMat ** 2
    sqDistances = sqDiffMat.sum(axis=1)
    distances = sqDistances ** 0.5
    sortedDistIndicies = distances.argsort()
    classCount = {}
    for i in range(k):
        voteIlabel = labels[sortedDistIndicies[i]]
        classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
    sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCount[0]


def img2vector(filename):
    returnVect = zeros((1, 1024))
    fr = open(filename)

    for i in range(32):
        lineStr = fr.readline()
        for j in range(32):
            returnVect[0, 32 * i + j] = int(lineStr[j])

    return returnVect


def identify(image):
    imageVector = zeros((1, 1024))
    for i in range(32):
        for j in range(32):
            imageVector[0, 32 * i + j] = int(image[i][j])

    hwLabels = []
    trainingFileList = listdir('digits/trainingDigits')
    m = len(trainingFileList)

    trainingMat = zeros((m, 1024))

    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)

        trainingMat[i, :] = img2vector('digits/trainingDigits/%s' % fileNameStr)

    classifierResult = classify0(imageVector[0], trainingMat, hwLabels, 3)

    result = {}
    result['result1'] = classifierResult[0]
    result['result2'] = classifierResult[1]
    return result
其中模板匹配使用28*28像素大小的手写数据集,k邻近算法使用二进制手写数据集。

上一篇:Rust 里 String,str,Vec,Vec 相互转换【Conversion between String, str, Vec,


下一篇:cmake构建和链接静态库与动态库