【5-1】CNN卷积神经网络

2024-01-23 11:40:42

一、这是一个传统的神经网络：

假如输入的是一张1000*1000像素点的图片，输入层v0就会有1M个节点；中间隐藏层v1设有1M个神经元，两层之间的参数就会有1M*1M=10^12个! 权值太多，计算量就会很大，需要的样本也就越大。于是，卷积神经网络出场了。

二、卷积神经网络的层级结构

从图中看出，卷积神经网络的层级结构分为卷积层（CONV）、池化层(POOL)、激励层(RELU)、全连通层(FC)。还有一个数据输入层。

三、卷积

eg1:

输入图像为5*5，卷积就是两个矩阵对应元素相乘再相加（内积）：1*1+1*0+1*1+1*1+1*1=4，得出特征值；按照步长（上图为1）滑动，算出其余值。

eg2:

上图‘3’代表深度（depth）:图像的RGB三个通道；图片像素点32*32；

小圆圈就是神经元（filter），每个神经元都有自己的权值矩阵，用来与输入数据卷积（相乘再相加）；

权值矩阵看作是一个窗口（receptive field），按照步长（stride）滑动，逐次计算；

eg3:

输入7*7*3，(depth=3，分别对应R、G、B)；神经元有2个（W0、W1）也是有3层；神经元W0对应层相卷积，再将三层计算出来的值相加+Bias b0，步长为2移动（pad1:在周围补了一圈0），得出Output0；神经元W1计算得到Output1；结果为2层。这样的好处就是权重个数减少了。

eg4:

神经元就相当于滤波器，只关注图像中的某一个特性并将它提取出来了。

四、池化层(Pooling Layer)

池化层的目的主要是压缩数据量，减少过拟合。

从原来的224*224减少到了112*112。

max pooling筛选出最大值。mean pooling筛选出平均值。

详细请参考：

https://www.cnblogs.com/skyfsm/p/6790245.html

https://www.cnblogs.com/fydeblog/p/7450413.html

五、参考代码

  1 import tensorflow as tf
  2 from tensorflow.examples.tutorials.mnist import input_data
  3 
  4 mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
  5 
  6 #每个批次的大小
  7 batch_size = 100
  8 #计算一共有多少个批次
  9 n_batch = mnist.train.num_examples // batch_size
 10 
 11 #参数概要
 12 def variable_summaries(var):
 13     with tf.name_scope('summaries'):
 14         mean = tf.reduce_mean(var)
 15         tf.summary.scalar('mean', mean)#平均值
 16         with tf.name_scope('stddev'):
 17             stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
 18         tf.summary.scalar('stddev', stddev)#标准差
 19         tf.summary.scalar('max', tf.reduce_max(var))#最大值
 20         tf.summary.scalar('min', tf.reduce_min(var))#最小值
 21         tf.summary.histogram('histogram', var)#直方图
 22 
 23 #初始化权值
 24 def weight_variable(shape,name):
 25     initial = tf.truncated_normal(shape,stddev=0.1)#生成一个截断的正态分布
 26     return tf.Variable(initial,name=name)
 27 
 28 #初始化偏置
 29 def bias_variable(shape,name):
 30     initial = tf.constant(0.1,shape=shape)
 31     return tf.Variable(initial,name=name)
 32 
 33 #卷积层
 34 def conv2d(x,W):
 35     #x input tensor of shape `[batch, in_height, in_width, in_channels]`
 36     #W filter / kernel tensor of shape [filter_height, filter_width, in_channels, out_channels]
 37     #`strides[0] = strides[3] = 1`. strides[1]代表x方向的步长，strides[2]代表y方向的步长
 38     #padding: A `string` from: `"SAME", "VALID"`
 39     return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')
 40 
 41 #池化层
 42 def max_pool_2x2(x):
 43     #ksize [1,x,y,1]
 44     return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
 45 
 46 #命名空间
 47 with tf.name_scope('input'):
 48     #定义两个placeholder
 49     x = tf.placeholder(tf.float32,[None,784],name='x-input')
 50     y = tf.placeholder(tf.float32,[None,10],name='y-input')
 51     with tf.name_scope('x_image'):
 52         #改变x的格式转为4D的向量[batch, in_height, in_width, in_channels]`
 53         x_image = tf.reshape(x,[-1,28,28,1],name='x_image')
 54 
 55 
 56 with tf.name_scope('Conv1'):
 57     #初始化第一个卷积层的权值和偏置
 58     with tf.name_scope('W_conv1'):
 59         W_conv1 = weight_variable([5,5,1,32],name='W_conv1')#5*5的采样窗口，32个卷积核从1个平面抽取特征
 60     with tf.name_scope('b_conv1'):  
 61         b_conv1 = bias_variable([32],name='b_conv1')#每一个卷积核一个偏置值
 62 
 63     #把x_image和权值向量进行卷积，再加上偏置值，然后应用于relu激活函数
 64     with tf.name_scope('conv2d_1'):
 65         conv2d_1 = conv2d(x_image,W_conv1) + b_conv1
 66     with tf.name_scope('relu'):
 67         h_conv1 = tf.nn.relu(conv2d_1)
 68     with tf.name_scope('h_pool1'):
 69         h_pool1 = max_pool_2x2(h_conv1)#进行max-pooling
 70 
 71 with tf.name_scope('Conv2'):
 72     #初始化第二个卷积层的权值和偏置
 73     with tf.name_scope('W_conv2'):
 74         W_conv2 = weight_variable([5,5,32,64],name='W_conv2')#5*5的采样窗口，64个卷积核从32个平面抽取特征
 75     with tf.name_scope('b_conv2'):  
 76         b_conv2 = bias_variable([64],name='b_conv2')#每一个卷积核一个偏置值
 77 
 78     #把h_pool1和权值向量进行卷积，再加上偏置值，然后应用于relu激活函数
 79     with tf.name_scope('conv2d_2'):
 80         conv2d_2 = conv2d(h_pool1,W_conv2) + b_conv2
 81     with tf.name_scope('relu'):
 82         h_conv2 = tf.nn.relu(conv2d_2)
 83     with tf.name_scope('h_pool2'):
 84         h_pool2 = max_pool_2x2(h_conv2)#进行max-pooling
 85 
 86 #28*28的图片第一次卷积后还是28*28，第一次池化后变为14*14
 87 #第二次卷积后为14*14，第二次池化后变为了7*7
 88 #进过上面操作后得到64张7*7的平面
 89 
 90 with tf.name_scope('fc1'):
 91     #初始化第一个全连接层的权值
 92     with tf.name_scope('W_fc1'):
 93         W_fc1 = weight_variable([7*7*64,1024],name='W_fc1')#上一场有7*7*64个神经元，全连接层有1024个神经元
 94     with tf.name_scope('b_fc1'):
 95         b_fc1 = bias_variable([1024],name='b_fc1')#1024个节点
 96 
 97     #把池化层2的输出扁平化为1维
 98     with tf.name_scope('h_pool2_flat'):
 99         h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64],name='h_pool2_flat')
100     #求第一个全连接层的输出
101     with tf.name_scope('wx_plus_b1'):
102         wx_plus_b1 = tf.matmul(h_pool2_flat,W_fc1) + b_fc1
103     with tf.name_scope('relu'):
104         h_fc1 = tf.nn.relu(wx_plus_b1)
105 
106     #keep_prob用来表示神经元的输出概率
107     with tf.name_scope('keep_prob'):
108         keep_prob = tf.placeholder(tf.float32,name='keep_prob')
109     with tf.name_scope('h_fc1_drop'):
110         h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob,name='h_fc1_drop')
111 
112 with tf.name_scope('fc2'):
113     #初始化第二个全连接层
114     with tf.name_scope('W_fc2'):
115         W_fc2 = weight_variable([1024,10],name='W_fc2')
116     with tf.name_scope('b_fc2'):    
117         b_fc2 = bias_variable([10],name='b_fc2')
118     with tf.name_scope('wx_plus_b2'):
119         wx_plus_b2 = tf.matmul(h_fc1_drop,W_fc2) + b_fc2
120     with tf.name_scope('softmax'):
121         #计算输出
122         prediction = tf.nn.softmax(wx_plus_b2)
123 
124 #交叉熵代价函数
125 with tf.name_scope('cross_entropy'):
126     cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction),name='cross_entropy')
127     tf.summary.scalar('cross_entropy',cross_entropy)
128     
129 #使用AdamOptimizer进行优化
130 with tf.name_scope('train'):
131     train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
132 
133 #求准确率
134 with tf.name_scope('accuracy'):
135     with tf.name_scope('correct_prediction'):
136         #结果存放在一个布尔列表中
137         correct_prediction = tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))#argmax返回一维张量中最大的值所在的位置
138     with tf.name_scope('accuracy'):
139         #求准确率
140         accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
141         tf.summary.scalar('accuracy',accuracy)
142         
143 #合并所有的summary
144 merged = tf.summary.merge_all()
145 
146 with tf.Session() as sess:
147     sess.run(tf.global_variables_initializer())
148     train_writer = tf.summary.FileWriter('logs/train',sess.graph)
149     test_writer = tf.summary.FileWriter('logs/test',sess.graph)
150     for i in range(1001):
151         #训练模型
152         batch_xs,batch_ys =  mnist.train.next_batch(batch_size)
153         sess.run(train_step,feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.5})
154         #记录训练集计算的参数
155         summary = sess.run(merged,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
156         train_writer.add_summary(summary,i)
157         #记录测试集计算的参数
158         batch_xs,batch_ys =  mnist.test.next_batch(batch_size)
159         summary = sess.run(merged,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
160         test_writer.add_summary(summary,i)
161     
162         if i%100==0:
163             test_acc = sess.run(accuracy,feed_dict={x:mnist.test.images,y:mnist.test.labels,keep_prob:1.0})
164             train_acc = sess.run(accuracy,feed_dict={x:mnist.train.images[:10000],y:mnist.train.labels[:10000],keep_prob:1.0})
165             print ("Iter " + str(i) + ", Testing Accuracy= " + str(test_acc) + ", Training Accuracy= " + str(train_acc))

2019-06-11 18:59:16

码农公寓

相关文章