三层神经网络,训练0到9十个数字并测试:
1 import numpy 2 import scipy.special 3 # import matplotlib.pyplot 4 import time 5 6 7 class NeuralNetwork: 8 9 # 初始化神经网络 10 def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate): 11 # 设置输入层、隐藏层、输出层的节点数 12 self.inodes = inputnodes 13 self.hnodes = hiddenodes 14 self.onodes = outputnodes 15 16 # 学习因子 17 self.lr = learningrate 18 19 # 输入层、隐藏层、输出层之间的链接权重 20 # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5) 21 # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5) 22 # 利用正态分布采样权重 23 self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes)) 24 self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes)) 25 26 # S函数 27 self.activation_function = lambda x: scipy.special.expit(x) 28 pass 29 30 # 训练 31 def train(self, inputs_list, targets_list): 32 # 输入层转矩阵 33 inputs = numpy.array(inputs_list, ndmin=2).T 34 targets = numpy.array(targets_list, ndmin=2).T 35 36 # 隐藏层输入=权重点乘输入层矩阵 37 hidden_inputs = numpy.dot(self.wih, inputs) 38 # 隐藏层应用S函数 39 hidden_outputs = self.activation_function(hidden_inputs) 40 41 # 输出层输入=权重点乘隐藏层输入矩阵 42 final_inputs = numpy.dot(self.who, hidden_outputs) 43 # 输出层输入应用S函数 44 final_outputs = self.activation_function(final_inputs) 45 46 # 计算误差 47 output_errors = targets - final_outputs; 48 # 计算隐藏层误差 49 hidden_errors = numpy.dot(self.who.T, output_errors) 50 # 更新隐藏层和输出层之间的权重 51 self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), 52 numpy.transpose(hidden_outputs)) 53 # 更新输入层和隐藏层之间的权重 54 self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), 55 numpy.transpose(inputs)) 56 57 pass 58 59 # 查询 60 def query(self, inputs_list): 61 # 输入层转矩阵 62 inputs = numpy.array(inputs_list, ndmin=2).T 63 64 # 隐藏层输入=权重点乘输入层矩阵 65 hidden_inputs = numpy.dot(self.wih, inputs) 66 # 隐藏层应用S函数 67 hidden_outputs = self.activation_function(hidden_inputs) 68 69 # 输出层输入=权重点乘隐藏层输入矩阵 70 final_inputs = numpy.dot(self.who, hidden_outputs) 71 # 输出层输入应用S函数 72 final_outputs = self.activation_function(final_inputs) 73 74 return final_outputs 75 76 77 # 输入、隐藏、输出三层节点数 78 input_nodes = 784 79 hidden_nodes = 100 80 output_nodes = 10 81 82 # 学习因子 83 learning_rate = 0.2 84 85 # 创建神经网络 86 n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate) 87 88 # 读取训练数据 89 # training_data_file = open("web/mnist_dataset/mnist_train_100.csv") 90 training_data_file = open("web/mnist_dataset/mnist_train.csv") 91 training_data_list = training_data_file.readlines() 92 training_data_file.close() 93 94 # 世代,所有数据训练一遍为一个世代 95 epochs = 1 96 start = int(time.time()) 97 for e in range(epochs): 98 # 训练神经网络 99 for record in training_data_list: 100 # 按逗号切分成数组 101 all_values = record.split(",") 102 # 缩放并转换成0.01到0.99之间的数组 103 inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01 104 # 构建真实输出数组,除了目标位置是0.99,其他都是0.01 105 targets = numpy.zeros(output_nodes) + 0.01 106 targets[int(all_values[0])] = 0.99 107 n.train(inputs, targets) 108 pass 109 pass 110 end = int(time.time()) 111 print("训练用时=", end - start, "秒") 112 113 # 测试数据 114 # test_data_file = open("web/mnist_dataset/mnist_test_10.csv") 115 test_data_file = open("web/mnist_dataset/mnist_test.csv") 116 test_data_list = test_data_file.readlines() 117 test_data_file.close() 118 # all_values = test_data_list[0].split(",") 119 # image_array = numpy.asfarray(all_values[1:]).reshape((28, 28)) 120 # matplotlib.pyplot.imshow(image_array, cmap='Greys', interpolation='None') 121 # 122 # value = n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01) 123 # print(value) 124 125 # 统计分数 126 scorecard = [] 127 128 start = int(time.time()) 129 # 检查所有测试数据 130 for record in test_data_list: 131 all_values = record.split(",") 132 # 正确答案 133 correct_label = int(all_values[0]) 134 # print(correct_label, "正确答案") 135 136 inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01 137 outputs = n.query(inputs) 138 label = numpy.argmax(outputs) 139 # print(label, "神经网络答案") 140 141 if label == correct_label: 142 scorecard.append(1) 143 else: 144 scorecard.append(0) 145 pass 146 end = int(time.time()) 147 print("检查用时=", end - start, "秒") 148 149 # print(scorecard) 150 scorecard_array = numpy.asarray(scorecard) 151 print("正确率=", scorecard_array.sum() / scorecard_array.size)
验证码的数字和字母识别:
1 import numpy 2 import scipy.special 3 import scipy.ndimage.interpolation 4 # import matplotlib.pyplot 5 import time 6 import string 7 from random import shuffle 8 9 import matplotlib.pyplot 10 11 12 13 class NeuralNetwork: 14 15 # 初始化神经网络 16 def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate): 17 # 设置输入层、隐藏层、输出层的节点数 18 self.inodes = inputnodes 19 self.hnodes = hiddenodes 20 self.onodes = outputnodes 21 22 # 学习因子 23 self.lr = learningrate 24 25 # 输入层、隐藏层、输出层之间的链接权重 26 # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5) 27 # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5) 28 # 利用正态分布采样权重 29 self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes)) 30 self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes)) 31 32 # S函数 33 self.activation_function = lambda x: scipy.special.expit(x) 34 self.inverse_activation_function = lambda x: scipy.special.logit(x) 35 pass 36 37 # 训练 38 def train(self, inputs_list, targets_list): 39 # 输入层转矩阵 40 inputs = numpy.array(inputs_list, ndmin=2).T 41 targets = numpy.array(targets_list, ndmin=2).T 42 43 # 隐藏层输入=权重点乘输入层矩阵 44 hidden_inputs = numpy.dot(self.wih, inputs) 45 # 隐藏层应用S函数 46 hidden_outputs = self.activation_function(hidden_inputs) 47 48 # 输出层输入=权重点乘隐藏层输入矩阵 49 final_inputs = numpy.dot(self.who, hidden_outputs) 50 # 输出层输入应用S函数 51 final_outputs = self.activation_function(final_inputs) 52 53 # 计算误差 54 output_errors = targets - final_outputs 55 # 计算隐藏层误差 56 hidden_errors = numpy.dot(self.who.T, output_errors) 57 # 更新隐藏层和输出层之间的权重 58 self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), 59 numpy.transpose(hidden_outputs)) 60 # 更新输入层和隐藏层之间的权重 61 self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), 62 numpy.transpose(inputs)) 63 64 pass 65 66 # 查询 67 def query(self, inputs_list): 68 # 输入层转矩阵 69 inputs = numpy.array(inputs_list, ndmin=2).T 70 71 # 隐藏层输入=权重点乘输入层矩阵 72 hidden_inputs = numpy.dot(self.wih, inputs) 73 # 隐藏层应用S函数 74 hidden_outputs = self.activation_function(hidden_inputs) 75 76 # 输出层输入=权重点乘隐藏层输入矩阵 77 final_inputs = numpy.dot(self.who, hidden_outputs) 78 # 输出层输入应用S函数 79 final_outputs = self.activation_function(final_inputs) 80 81 return final_outputs 82 83 def back_query(self, targets_list): 84 # transpose the targets list to a vertical array 85 final_outputs = numpy.array(targets_list, ndmin=2).T 86 87 # calculate the signal into the final output layer 88 final_inputs = self.inverse_activation_function(final_outputs) 89 90 # calculate the signal out of the hidden layer 91 hidden_outputs = numpy.dot(self.who.T, final_inputs) 92 # scale them back to 0.01 to .99 93 hidden_outputs -= numpy.min(hidden_outputs) 94 hidden_outputs /= numpy.max(hidden_outputs) 95 hidden_outputs *= 0.98 96 hidden_outputs += 0.01 97 98 # calculate the signal into the hidden layer 99 hidden_inputs = self.inverse_activation_function(hidden_outputs) 100 101 # calculate the signal out of the input layer 102 inputs = numpy.dot(self.wih.T, hidden_inputs) 103 # scale them back to 0.01 to .99 104 inputs -= numpy.min(inputs) 105 inputs /= numpy.max(inputs) 106 inputs *= 0.98 107 inputs += 0.01 108 109 return inputs 110 111 112 # 验证码,10个数字+26个大小写字母97-122 113 codes = list(string.digits + string.ascii_lowercase) 114 # 输入、隐藏、输出三层节点数 115 input_nodes = 45 * 100 116 # 四位验证码 117 output_nodes = (len(codes)) 118 hidden_nodes = output_nodes * 10 119 120 121 # 学习因子 122 learning_rate = 0.2 123 124 # 创建神经网络 125 n = NeuralNetwork(input_nodes, hidden_nodes, output_nodes, learning_rate) 126 127 # 读取训练数据 128 # training_data_file = open("web/mnist_dataset/mnist_train_100.csv") 129 training_data_file = open("web/train.csv") 130 training_data_list = training_data_file.readlines() 131 shuffle(training_data_list) 132 training_data_file.close() 133 134 135 # 世代,所有数据训练一遍为一个世代 136 epochs = 1 137 print("输入节点=%d,隐藏节点=%d,输出节点=%d,学习因子=%f,时代=%d" % (input_nodes, hidden_nodes, output_nodes, learning_rate, epochs)) 138 print("开始训练...") 139 start = int(time.time()) 140 for e in range(epochs): 141 # 训练神经网络 142 for record in training_data_list: 143 # 按逗号切分成数组 144 all_values = record.split(",") 145 # 缩放并转换成0.01到0.99之间的数组 146 train_inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01 147 # 旋转正负10度 148 train_inputs_plus_10 = scipy.ndimage.interpolation.rotate(train_inputs.reshape(1, 4500), 10, cval=0.01, 149 reshape=False) 150 train_inputs_minus_10 = scipy.ndimage.interpolation.rotate(train_inputs.reshape(1, 4500), -10, cval=0.01, 151 reshape=False) 152 153 # 构建真实输出数组,除了目标位置是0.99,其他都是0.01 154 train_targets = numpy.zeros(output_nodes) + 0.01 155 code = all_values[0] 156 train_targets[codes.index(code)] = 0.99 157 n.train(train_inputs, train_targets) 158 n.train(train_inputs_plus_10, train_targets) 159 n.train(train_inputs_minus_10, train_targets) 160 pass 161 pass 162 end = int(time.time()) 163 print("训练用时=", end - start, "秒") 164 165 # 测试数据 166 # test_data_file = open("web/mnist_dataset/mnist_test_10.csv") 167 # test_data_file = open("web/mnist_dataset/mnist_test.csv") 168 test_data_file = open("web/test.csv") 169 test_data_list = test_data_file.readlines() 170 test_data_file.close() 171 # all_values = test_data_list[0].split(",") 172 # image_array = numpy.asfarray(all_values[1:]).reshape((28, 28)) 173 # matplotlib.pyplot.imshow(image_array, cmap='Greys', interpolation='None') 174 # 175 # value = n.query((numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01) 176 # print(value) 177 178 # 统计分数 179 scorecard = [] 180 181 print("开始测试...") 182 start = int(time.time()) 183 # 检查所有测试数据 184 for record in test_data_list: 185 all_values = record.split(",") 186 # 正确答案 187 correct_label = (all_values[0]) 188 # print(correct_label, "正确答案") 189 190 verify_inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01 191 verify_outputs = n.query(verify_inputs) 192 # print(verify_outputs) 193 label = codes[numpy.argmax(verify_outputs)] 194 # print(label, "神经网络答案") 195 196 if label == correct_label: 197 scorecard.append(1) 198 else: 199 scorecard.append(0) 200 pass 201 pass 202 end = int(time.time()) 203 print("检查用时=", end - start, "秒") 204 205 # print(scorecard) 206 scorecard_array = numpy.asarray(scorecard) 207 print("正确率=", scorecard_array.sum() / scorecard_array.size) 208 209 label = 0 210 # create the output signals for this label 211 targets = numpy.zeros(output_nodes) + 0.01 212 # all_values[0] is the target label for this record 213 targets[label] = 0.99 214 # print(targets) 215 216 # get image data 217 image_data = n.back_query(targets) 218 # print(image_data) 219 220 # plot image data 221 matplotlib.pyplot.imshow(image_data.reshape(45, 100), cmap='Greys', interpolation='None')
制作训练和测试数据:
1 import csv 2 # import matplotlib.pyplot 3 import os 4 import cv2 5 6 import numpy 7 import scipy.special 8 import string 9 10 11 class NeuralNetwork: 12 13 # 初始化神经网络 14 def __init__(self, inputnodes, hiddenodes, outputnodes, learningrate): 15 # 设置输入层、隐藏层、输出层的节点数 16 self.inodes = inputnodes 17 self.hnodes = hiddenodes 18 self.onodes = outputnodes 19 20 # 学习因子 21 self.lr = learningrate 22 23 # 输入层、隐藏层、输出层之间的链接权重 24 # self.wih = (numpy.random.rand(self.hnodes, self.inodes) - 0.5) 25 # self.who = (numpy.random.rand(self.onodes, self.inodes) - 0.5) 26 # 利用正态分布采样权重 27 self.wih = numpy.random.normal(0.0, pow(self.hnodes, - 0.5), (self.hnodes, self.inodes)) 28 self.who = numpy.random.normal(0.0, pow(self.onodes, - 0.5), (self.onodes, self.hnodes)) 29 30 # S函数 31 self.activation_function = lambda x: scipy.special.expit(x) 32 pass 33 34 # 训练 35 def train(self, inputs_list, targets_list): 36 # 输入层转矩阵 37 inputs = numpy.array(inputs_list, ndmin=2).T 38 targets = numpy.array(targets_list, ndmin=2).T 39 40 # 隐藏层输入=权重点乘输入层矩阵 41 hidden_inputs = numpy.dot(self.wih, inputs) 42 # 隐藏层应用S函数 43 hidden_outputs = self.activation_function(hidden_inputs) 44 45 # 输出层输入=权重点乘隐藏层输入矩阵 46 final_inputs = numpy.dot(self.who, hidden_outputs) 47 # 输出层输入应用S函数 48 final_outputs = self.activation_function(final_inputs) 49 50 # 计算误差 51 output_errors = targets - final_outputs; 52 # 计算隐藏层误差 53 hidden_errors = numpy.dot(self.who.T, output_errors) 54 # 更新隐藏层和输出层之间的权重 55 self.who += self.lr * numpy.dot((output_errors * final_outputs * (1.0 - final_outputs)), 56 numpy.transpose(hidden_outputs)) 57 # 更新输入层和隐藏层之间的权重 58 self.wih += self.lr * numpy.dot((hidden_errors * hidden_outputs * (1.0 - hidden_outputs)), 59 numpy.transpose(inputs)) 60 61 pass 62 63 # 查询 64 def query(self, inputs_list): 65 # 输入层转矩阵 66 inputs = numpy.array(inputs_list, ndmin=2).T 67 68 # 隐藏层输入=权重点乘输入层矩阵 69 hidden_inputs = numpy.dot(self.wih, inputs) 70 # 隐藏层应用S函数 71 hidden_outputs = self.activation_function(hidden_inputs) 72 73 # 输出层输入=权重点乘隐藏层输入矩阵 74 final_inputs = numpy.dot(self.who, hidden_outputs) 75 # 输出层输入应用S函数 76 final_outputs = self.activation_function(final_inputs) 77 78 return final_outputs 79 80 81 # 读取训练数据 82 def convert_img_to_csv(img_dir, csv_file): 83 # 设置需要保存的csv路径 84 with open(r"web/" + csv_file + ".csv", "w", newline="") as f: 85 # 设置csv文件的列名 86 # column_name = ["label"] 87 # column_name.extend(["pixel%d" % i for i in range(32 * 32)]) 88 # 将列名写入到csv文件中 89 writer = csv.writer(f) 90 # writer.writerow(column_name) 91 # 该目录下有9个目录,目录名从0-9 92 # for i in range(1): 93 # 获取目录的路径 94 # img_temp_dir = os.path.join(img_dir, str(i)) 95 # 获取该目录下所有的文件 96 img_list = os.listdir(img_dir) 97 # 遍历所有的文件名称 98 for img_name in img_list: 99 # 判断文件是否为目录,如果为目录则不处理 100 if not os.path.isdir(img_name): 101 # 获取图片的路径 102 img_path = os.path.join(img_dir, img_name) 103 # 因为图片是黑白的,所以以灰色读取图片 104 img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 105 # 图片标签 106 row_data = [img_name[0]] 107 # 获取图片的像素 108 ary = 255.0 - img.flatten() 109 row_data.extend(ary) 110 # 将图片数据写入到csv文件中 111 writer.writerow(row_data) 112 113 114 def split_image(img_dir, save_path): 115 image_save_path_tail = ".jpg" 116 # 获取该目录下所有的文件 117 img_list = os.listdir(img_dir) 118 seq = 1 119 # 遍历所有的文件名称 120 for img_name in img_list: 121 # 判断文件是否为目录,如果为目录则不处理 122 if not os.path.isdir(img_name): 123 # 获取图片的路径 124 img_path = os.path.join(img_dir, img_name) 125 # 因为图片是黑白的,所以以灰色读取图片 126 src_img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 127 # print(src_img) 128 # 180*100 129 for i in range(4): # [1]480*360==15*11---height 130 img_roi = src_img[0:100, (i * 45):((i + 1) * 45)] 131 image_save_path = "%s%s%s%d%s" % (save_path, img_name[(-8 + i)], "_", seq, image_save_path_tail) 132 cv2.imwrite(image_save_path, img_roi) 133 seq = seq + 1 134 pass 135 pass 136 137 138 if __name__ == "__main__": 139 # 将该目录下的图片保存为csv文件 140 # convert_img_to_csv("web/unknownCode", "train-single") 141 # convert_img_to_csv("web/train-single", "train") 142 # convert_img_to_csv("web/test-single", "test") 143 # a = list(string.digits + string.ascii_lowercase) 144 145 # print(ord(a[10])) 146 # print(a.index("a")) 147 148 # 一张图片切四张 149 # split_image("web/train/", "web/train-single/") 150 # convert_img_to_csv("web/train-single", "train") 151 split_image("web/unknownCode/", "web/test-single/") 152 convert_img_to_csv("web/test-single", "test") 153 pass