参考文章:
VGGnet——从TFrecords制作到网络训练
Tensorflow制作并用CNN训练自己的数据集
TensorFlow中的协调器tf.train.Coordinator和入队线程启动器tf.train.start_queue_runners
这篇文章是在TensorFlow制作自己的数据集,并用神经网络来训练自己制作的数据集【上】的基础上写的,所有在看这篇文章之前建议先看一下【上】这一部分。
在制作好我们的数据集之后我们就可以用于模型训练了,我是用的DenseNet网络来跑自己的数据集的。这个网络层数比较多,比较复杂,但是对于理解如何用自己的网络来训练自己制作的数据集还是可以的,下面直接上代码:
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
import tensorflow as tf
import os
from tflearn.layers.conv import global_avg_pool
from tensorflow.contrib.layers import batch_norm, flatten
from tensorflow.contrib.framework import arg_scope
import numpy as np
from dataset import get_batch_record
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#%%参数设置
#超参数
growth_k = 12 # growth_rate一般为12或者24
nb_block = 2 # how many (dense block + Transition Layer) ?本网络模型中使用了两个(密集块+过渡层)组合+第三个密集块
init_learning_rate = 1e-4 #0.0001
epsilon = 1e-8 # AdamOptimizer epsilon
dropout_rate = 0.2
# Momentum Optimizer will use
nesterov_momentum = 0.9
weight_decay = 1e-4
# Label & batch_size
class_num = 19 #此数据集是十分类问题
batch_size = 64
total_epochs = 300
tf.reset_default_graph()#解决错误
#%%网络层的定义
def one_hot(labels,Label_class):
one_hot_label = np.array([[int(i == int(labels[j])) for i in range(Label_class)] for j in range(len(labels))])
return one_hot_label
def conv_layer(input, filter, kernel, stride=1, layer_name="conv"): #卷积层定义 填充方式为“SAME”
with tf.name_scope(layer_name):
network = tf.layers.conv2d(inputs=input, filters=filter, kernel_size=kernel, strides=stride, padding='SAME')
return network
def Global_Average_Pooling(x, stride=1): #全局平均池化 步长为1
"""
width = np.shape(x)[1]
height = np.shape(x)[2]
pool_size = [width, height]
return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride) # The stride value does not matter
It is global average pooling without tflearn
"""
return global_avg_pool(x, name='Global_avg_pooling')
# But maybe you need to install h5py and curses or not
def Batch_Normalization(x, training, scope): #批处理归一化
with arg_scope([batch_norm],
scope=scope,
updates_collections=None,
decay=0.9,
center=True,
scale=True,
zero_debias_moving_mean=True) :
return tf.cond(training,
lambda : batch_norm(inputs=x, is_training=training, reuse=None),
lambda : batch_norm(inputs=x, is_training=training, reuse=True))
def Drop_out(x, rate, training) : #辍学层
return tf.layers.dropout(inputs=x, rate=rate, training=training)
def Relu(x): #激活函数Relu
return tf.nn.relu(x)
def Average_pooling(x, pool_size=[2,2], stride=2, padding='VALID'): #平均池化2*2 stride=2 padding=0
return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
def Max_Pooling(x, pool_size=[3,3], stride=2, padding='VALID'): #最大池化3*3 stride=2 padding=0
return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
def Concatenation(layers) : #级联
return tf.concat(layers, axis=3)#将张量沿一个维度串联
#T1 = [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] ] T2 = [ [ 7 , 8 , 9 ] , [ 10 , 11 , 12 ] ]
#tf.concat([T1 ,T2] ,0) == > [[1 , 2 ,3 ],[4 ,5 ,6],[7 ,8 ,9],[10 ,11,12]]
#tf.concat([T1 ,T2] ,1) == > [[ 1 ,2 ,3 ,7 ,8 ,9 ],[4 ,5 ,6,10 ,11 ,12]]
def Linear(x) : #线性连接层
return tf.layers.dense(inputs=x, units=class_num, name='linear')
#%%DenseNet网络类的定义
class DenseNet(): #DenseNet类
def __init__(self, x, nb_blocks, filters, training):
self.nb_blocks = nb_blocks
self.filters = filters
self.training = training
self.model = self.Dense_net(x)
def bottleneck_layer(self, x, scope): #瓶颈层(1*1conv + 3*3conv) 1*1conv是为了压缩减少计算量 3*3conv是为了提取出轮廓特征
# print(x)
#bottleneck_layer=2*(BN+Relu+conv_layer+Drop_out)
with tf.name_scope(scope):
x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
x = Relu(x)
x = conv_layer(x, filter=4 * self.filters, kernel=[1,1], layer_name=scope+'_conv1') #在我们的实验中,我们让每个1×1卷积产生4k feature-maps。
x = Drop_out(x, rate=dropout_rate, training=self.training)
x = Batch_Normalization(x, training=self.training, scope=scope+'_batch2')
x = Relu(x)
x = conv_layer(x, filter=self.filters, kernel=[3,3], layer_name=scope+'_conv2')
x = Drop_out(x, rate=dropout_rate, training=self.training)
# print(x)
return x
def transition_layer(self, x, scope): #过渡层(BN+Relu+conv_layer+Drop_out+A_pooling)
with tf.name_scope(scope):
x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') #批处理归一化
x = Relu(x) #激活函数
# x = conv_layer(x, filter=self.filters, kernel=[1,1], layer_name=scope+'_conv1')
# https://github.com/taki0112/Densenet-Tensorflow/issues/10
in_channel = x.shape[-1] #表示x的通道数目x=(h,w,z),x=h*w*z 所以x.shape[-1]=z
x = conv_layer(x, filter=int(in_channel)*0.5, kernel=[1,1], layer_name=scope+'_conv1') #tensorflow里面的filter类型是默认int型的 此处filter中的0.5相当于压缩因子,是为了进一步提高模型的紧凑型,减少过渡层的特征映射数量
x = Drop_out(x, rate=dropout_rate, training=self.training)
x = Average_pooling(x, pool_size=[2,2], stride=2)
return x
def dense_block(self, input_x, nb_layers, layer_name): #将各个网络层的输入都级联起来作为后面网络层的输入密集块
with tf.name_scope(layer_name):
layers_concat = list() #创建一个列表用于存放每层的特征图
layers_concat.append(input_x) #利用列表的内置函数向列表中存入某层的特征图
x = self.bottleneck_layer(input_x, scope=layer_name + '_bottleN_' + str(0)) #瓶颈层产生k个特征图
layers_concat.append(x) #将瓶颈层中产生的特征图存入列表当中
for i in range(nb_layers - 1): #利用一个for循环将剩余的各层的特征图存入列表当中
x = Concatenation(layers_concat)
x = self.bottleneck_layer(x, scope=layer_name + '_bottleN_' + str(i + 1))
layers_concat.append(x)
x = Concatenation(layers_concat) #级联操作,将张量沿一个维度串联起来
return x
def Dense_net(self, input_x): #密集网络
x = conv_layer(input_x, filter=2 * self.filters, kernel=[7,7], stride=2, layer_name='conv0') #得到28*28*2k的feature-map
x = Max_Pooling(x, pool_size=[3,3], stride=2) #得到14*14*2k的feature-map
for i in range(self.nb_blocks) :
# 6 -> 12 -> 48
x = self.dense_block(input_x=x, nb_layers=4, layer_name='dense_'+str(i)) #每个dense_block有4*(1*1conv+3*3conv)
x = self.transition_layer(x, scope='trans_'+str(i)) #一个transition_layer有(1*1conv+2*2A-pooling_layer)
"""
x = self.dense_block(input_x=x, nb_layers=6, layer_name='dense_1')
x = self.transition_layer(x, scope='trans_1')
x = self.dense_block(input_x=x, nb_layers=12, layer_name='dense_2')
x = self.transition_layer(x, scope='trans_2')
x = self.dense_block(input_x=x, nb_layers=48, layer_name='dense_3')
x = self.transition_layer(x, scope='trans_3')
"""
x = self.dense_block(input_x=x, nb_layers=32, layer_name='dense_final') #最后一个dense_block有32*(1*1conv+3*3conv)
# 100 Layer
x = Batch_Normalization(x, training=self.training, scope='linear_batch')
x = Relu(x)
x = Global_Average_Pooling(x)
x = flatten(x)
x = Linear(x)
#x = tf.reshape(x, [-1, 19])
return x
#%%训练和模型评估
x = tf.placeholder(tf.float32, [batch_size,128,128,3])
batch_images = tf.reshape(x, [-1, 128, 128, 3])
label = tf.placeholder(tf.float32, [batch_size,19])
training_flag = tf.placeholder(tf.bool) #tf.bool:布尔值 tf.float32:32位单精度浮点数
learning_rate = tf.placeholder(tf.float32, name='learning_rate')
logits = DenseNet(x=batch_images, nb_blocks=nb_block, filters=growth_k, training=training_flag).model #logits=DenseNet.model=Dense_net(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)) #计算logits和labels之间的softmax交叉熵(已被废弃,现在已用tf.nn.softmax_cross_entropy_with_logits_v2替代)
"""
l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=nesterov_momentum, use_nesterov=True)
train = optimizer.minimize(cost + l2_loss * weight_decay)
In paper, use MomentumOptimizer
init_learning_rate = 0.1
but, I'll use AdamOptimizer
"""
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=epsilon) #采用Adam优化器
train = optimizer.minimize(cost) #采用Adam优化器最小化训练损失
test = optimizer.minimize(cost)
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#tf.summary.scalar('loss', cost)
#tf.summary.scalar('accuracy', accuracy)
#saver = tf.train.Saver(tf.global_variables()) #训练网络之后保存训练好的模型,以及在程序中读取已保存好的模型
#tf.global_variables() 如果我希望查看全部变量,包括我的学习率等信息,可以通过tf.global_variables()来实现
#%%将数据集文件TFRecords喂到网络中进行训练
tfrecords_file0 = os.getcwd() + '//outputdata//train.tfrecords' #此路径是之前tfrecords文件的地址
tfrecords_file1 = os.getcwd() + '//outputdata//test.tfrecords'
image_batch, label_batch = get_batch_record(tfrecords_file0,batch_size)
image_test_batch,label_test_batch = get_batch_record(tfrecords_file1,batch_size)
with tf.Session() as sess:
#ckpt = tf.train.get_checkpoint_state('./model') #tf.train.get_checkpoint_state (path)该类作用是获得保存节点文件的状态,path参数对应设置的节点保存文件路径
#if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):#快捷访问到最新保存的节点文件
# saver.restore(sess, ckpt.model_checkpoint_path)#使用saver.restore()方法,重载模型的参数,继续训练或者用于测试数据
#else:
sess.run(tf.global_variables_initializer())
#merged = tf.summary.merge_all() #tf.summary.merge_all() 可以将所有summary全部保存到磁盘,以便tensorboard显示。
#writer = tf.summary.FileWriter('./logs', sess.graph) #用来将神经网络可视化的
coord = tf.train.Coordinator() # 开启一个协调器
threads=tf.train.start_queue_runners(sess=sess,coord=coord) # 使用start_queue_runners 启动队列填充
epoch_learning_rate = init_learning_rate
for epoch in range(total_epochs):
if epoch == (total_epochs * 0.5) or epoch == (total_epochs * 0.75):#这里为什么要这样设置,total_epochs*0.75不是为浮点型数据吗?
epoch_learning_rate = epoch_learning_rate / 10 #降低学习率
total_batch = int(5081 / batch_size) #batch_size=64,total_batch=5081/64=79,one epoch的iteration=79
for step in range(total_batch):
image0, label0 = sess.run([image_batch, label_batch])
label0 = one_hot(label0,19)
train_feed_dict = {
x: image0,
label: label0,
learning_rate: epoch_learning_rate,
training_flag : True
}
#print(image0.shape,label0.shape)
_, train_loss = sess.run([train, cost], feed_dict=train_feed_dict)
train_accuracy = sess.run(accuracy, feed_dict=train_feed_dict)
print("Epoch:[%4d] [%4d/%4d],Training loss:[%.8f], Training accuracy:[%.8f]" % (epoch+1, step+1, total_batch, train_loss,train_accuracy) )
#以上是训练部分,以下是测试部分
image1,label1= sess.run([image_test_batch, label_test_batch])
label1 = one_hot(label1,19)
test_feed_dict = {
x: image1,
label: label1,
learning_rate: epoch_learning_rate,
training_flag : False
}
#print(image1.shape,label1.shape)
_, test_loss = sess.run([test,cost],feed_dict=test_feed_dict)
test_accuracy = sess.run(accuracy, feed_dict=test_feed_dict)
print("Epoch:[%04d/%04d], Training accuracy:[%.8f], Training loss:[%.8f], Testing accuracy:[%.8f], Testing loss:[%.8f]" % (epoch + 1,total_epochs,train_accuracy,train_loss,test_accuracy,test_loss))
# writer.add_summary(test_summary, global_step=epoch)
#saver.save(sess=sess, save_path='./model/dense.ckpt')
coord.request_stop()#发出终止所有线程的命令
coord.join(threads)#把线程加入主线程,等待threads结束
以上的代码块中都有注释,可以根据代码注释先自己理解一下。
我们在从制作自己的数据集并用神经网络去训练之前肯定已经用神经网络去训练过MNIST数据集了,那么现在要用神经网络去训练自己的数据集,主要就是注意这几点(导入MNIST数据集的部分从代码块中注释或者删除掉):
***1、***从之前制作数据集的模块中导入get_batch_record
方法,为导入数据做准备
from dataset import get_batch_record
***2、***利用get_batch_record
方法将之前制作好的存放数据集的文件.tfrecords
中的数据按批取出用于神经网络的训练
tfrecords_file0 = os.getcwd() + '//outputdata//train.tfrecords'
tfrecords_file1 = os.getcwd() + '//outputdata//test.tfrecords'
image_batch, label_batch = get_batch_record(tfrecords_file0,batch_size)
image_test_batch,label_test_batch = get_batch_record(tfrecords_file1,batch_size)
***3、***接下来就是启动图的运算,在进行迭代运算之前我们需要先开启一个协调器tf.train.Coordinator()
用来管理Session中的多线程,之后需要启动队列填充tf.train.start_queue_runners
,真正地将Tensor推入到内存序列*计算单元调用,如果不启动队列填充的话,内存序列是空的,数据流图会一直处于一个等待状态
coord = tf.train.Coordinator() # 开启一个协调器
threads=tf.train.start_queue_runners(sess=sess,coord=coord) # 使用start_queue_runners 启动队列填充
***4、***当迭代运算运行完毕后,也就是文件队列中的所有文件都被读取出列了,这时候就应该终止Session中的所有线程了。
coord.request_stop()#发出终止所有线程的命令
coord.join(threads)#把线程加入主线程,等待threads结束
这样我们就可以进行训练了,训练过程如下(由于迭代次数较多,在这里只是展示出训练已经开始了的图):
这样就实现了从自己制作数据集并用神经网络去训练自己制作的数据集这一完整过程。
欢迎大家多多交流!