一、使用模板匹配算法
match.py:
import os
import Function
root_dir = "digits/train2"
file7_7 = open("digits/picture7_7.txt", 'w+')
for fl in os.listdir(root_dir):
img_str = fl[0:-4] + ":" + Function.image_compression(root_dir + '/' + fl)
file7_7.write(img_str + '\n')
file7_7.close()
file7_7 = open("digits/picture7_7.txt", 'r')
root_dir = "digits/test"
Correct_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Error_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Rejection_rate = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
for fl in os.listdir(root_dir):
Same_dist_number = 0
Same_class = -1
min_dist = 7
dist_img = ""
test_img_str = Function.image_compression(root_dir + '/' + fl)
while True:
line = file7_7.readline()
if not line:
file7_7.seek(0)
break
train_str = line[-50:-1]
temp_dist = Function.distance(test_img_str, train_str)
if temp_dist < min_dist:
min_dist = temp_dist
dist_img = line[0:-51]
Same_dist_number = 0
Same_class = -1
elif temp_dist == min_dist:
Same_dist_number += 1
if dist_img[0:1] == line[0:1]:
Same_class = eval(line[0:1])
else:
Same_class = -1
if Same_dist_number == 0:
print("测试数字:", fl[0:-4], " -- 识别出来的结果:", dist_img)
if fl[0] == dist_img[0]:
Correct_rate[eval(fl[0])] += 1
else:
Error_rate[eval(fl[0])] += 1
elif Same_class == -1:
print("测试数字:", fl[0:-4], " -- 该数字拒绝识别!")
Rejection_rate[eval(fl[0])] += 1
else:
print("测试数字:", fl[0:-4], " -- 识别出来的结果(类):", Same_class)
if eval(fl[0]) == Same_class:
Correct_rate[eval(fl[0])] += 1
else:
Error_rate[eval(fl[0])] += 1
file7_7.close()
print("------------------------------------------------")
for i in range(10):
print("数字 {:d} 识别的正确率 = {:.2f}% ,错误率 = {:.2f}% ,拒绝识别率 = {:.2f}%".format(i, Correct_rate[i] * 5, Error_rate[i] * 5,
Rejection_rate[i] * 5))
print("成功!")
Function.py:
import cv2
def image_compression(img_path):
img_str = ""
img = cv2.imread(img_path)
x = y = 0
for k in range(1, 50):
title_imf = 0
for i in range(4):
for j in range(4):
if img[x + i - 1][y + j - 1][0] > 127:
title_imf += 1
y = (y + 4) % 28
if title_imf >= 13:
img_str += '1'
else:
img_str += '0'
if k % 7 == 0:
x = x + 4
y = 0
return img_str
def distance(test_str, train_str):
len_str = len(train_str)
dist = 0.0
for i in range(len_str):
dist += (eval(test_str[i:i + 1]) - eval(train_str[i:i + 1])) ** 2
dist **= 0.5
return dist
二、k邻近算法实现
k.py
import os
import numpy as np
import Function2
def handwritingClassficationTest():
hwLabels = []
trainingFileList = os.listdir('digits/trainingDigits')
m = len(trainingFileList)
trainingMat = np.zeros((m, 1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i, :] = Function2.img2vector('digits/trainingDigits/%s' % fileNameStr)
testFileList = os.listdir('digits/testDigits')
errorCount = 0.0
mTest = len(testFileList)
for i in range(mTest):
fileNameStr = testFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
vectorUnderTest = Function2.img2vector('digits/testDigits/%s' % fileNameStr)
classifierResult = Function2.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
print("测试出的数字是: %s, 正确的数字是: %s" % (classifierResult, classNumStr))
if (classifierResult[0] != classNumStr): errorCount += 1.0
print("\n 识别错误的个数是: %s" % errorCount)
print("\n 正确率为: %f" % (1 - errorCount / float(mTest)))
handwritingClassficationTest()
Function2.py:
from os import listdir
from numpy import *
import operator
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
diffMat = tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
sortedDistIndicies = distances.argsort()
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0]
def img2vector(filename):
returnVect = zeros((1, 1024))
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect[0, 32 * i + j] = int(lineStr[j])
return returnVect
def identify(image):
imageVector = zeros((1, 1024))
for i in range(32):
for j in range(32):
imageVector[0, 32 * i + j] = int(image[i][j])
hwLabels = []
trainingFileList = listdir('digits/trainingDigits')
m = len(trainingFileList)
trainingMat = zeros((m, 1024))
for i in range(m):
fileNameStr = trainingFileList[i]
fileStr = fileNameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i, :] = img2vector('digits/trainingDigits/%s' % fileNameStr)
classifierResult = classify0(imageVector[0], trainingMat, hwLabels, 3)
result = {}
result['result1'] = classifierResult[0]
result['result2'] = classifierResult[1]
return result
其中模板匹配使用28*28像素大小的手写数据集,k邻近算法使用二进制手写数据集。