文章目录
前言
据说DenseNet 是优于ResNet的网络结构,有着参数少,性能优越的特点并且实现思路很简单。
表面上好像是跨连接,实际上是concatenate 特征图。
一、什么是DenseNet?
先来看一张图
上图描述的就是DneseBlock的计算过程,它并不是像ResNet 一样 对卷积计算的结果进行Add,而是利用每次计算的特征图进行堆叠。对于一般的卷积神经网络。计算规则如公式所示: 对于DenseNet 而言: 也就是对特征层的重用。了解到这些之后,相信聪明的小伙伴已经意识到这个一个问题。想如此堆叠特征层必然会引来特征层通道数过多的问题,同时特征层的大小要和输入时的一致,这样势必会倒是网络的参数过多。于是作者引入了transition_block,如下图红色方框中所示:
transition_block 包含一个1x1的卷积,用来调整通道数,和一个 2x2 的池化来缩小特征层的大小。这样每个Dense Block中进行计算时 特征保持不变,直到进入下一个Dense Block。 这就是Dense Block的实现思路了。
二、keras 复现
1.Conv Block
该部分是用于Dense Block 中进行卷积运算的块。每个参数的含义 已经写在代码的注释中。
代码如下(示例):
def conv_block(x,stage,branch,nb_filter,dropout_rate = None):
# 参数
# x: input tensor #输入张量
# stage: index for dense block #第几个dense_block
# branch: layer index within each dense block #dense_block 中各层的索引
# nb_filter: number of filters #卷积核的个数
# dropout_rate: dropout rate #dropout 参数
eps = 1.1e-5
bn_axis =3
conv_name_base = 'conv' + str(stage) + '_' + str(branch)
relu_name_base = 'relu' + str(stage) + '_' + str(branch)
inter_channels = 4 * nb_filter
# 1x1 的卷积用于调整输入tensor 的通道数
x = BatchNormalization(epsilon=eps,axis=bn_axis,name=conv_name_base+'_x1_bn')(x)
x = Activation('relu',name=relu_name_base+'_x1')(x)
x = Conv2D(inter_channels,1,1,name=conv_name_base+'_x1',use_bias=False)(x)
print("x in conv: ",x.shape)
if dropout_rate:
x = Dropout(dropout_rate)(x)
# 3x3 的卷积
x = BatchNormalization(epsilon=eps,axis=bn_axis,name=conv_name_base+'_x2_bn')(x)
x = Activation('relu',name=relu_name_base+'_x2')(x)
x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x)
x = Conv2D(nb_filter, 3, 1, name=conv_name_base+'_x2', use_bias=False)(x)
print("x in conv2: ",x.shape)
if dropout_rate:
x = Dropout(dropout_rate)(x)
return x
2.Dense Block
通过定义好的conv_block 去声明 Dense Block。
def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None,grow_nb_filters=True):
# Build a dense_block where the output of each conv_block is fed to subsequent ones
# # Arguments
# x: input tensor
# stage: index for dense block
# nb_layers: the number of layers of conv_block to append to the model. 添加卷积块的个数
# nb_filter: number of filters 卷积核的个数
# growth_rate: growth rate 增长率
# dropout_rate: dropout rate dropout
# grow_nb_filters: flag to decide to allow number of filters to grow 是否允许卷积核的个数变多
concat_feat = x
for i in range(nb_layers):
branch = i + 1
x = conv_block(concat_feat,stage,branch,growth_rate,dropout_rate)
concat_feat = layers.Concatenate(axis=3,name='concat_'+str(stage)+'_'+str(branch))([concat_feat,x])
if grow_nb_filters:
nb_filter += growth_rate
return concat_feat, nb_filter
growth rate 用于判断下一个 Dense Block 中卷积 的初始个数 ,作者的设置,为的是能够控制网络深层的卷积核个数。
3.Transition Block
用于缩小特征层。
def transition_block(x,stage,nb_filter,compression=1.0,dropout_rate = None):
# dense block 中 concat 特征层太多了 通过这个来缩小一下
# x: input tensor #输入张量
# stage: index for dense block #第几个trainsition_block
# nb_filter: number of filters #卷积核的个数
# compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. # 减少特征图的比例
# dropout_rate: dropout rate #dropout 参数
eps = 1.1e-5
bn_axis =3
conv_name_base = 'conv' + str(stage) + '_blk'
relu_name_base = 'relu' + str(stage) + '_blk'
pool_name_base = 'pool' + str(stage)
x = BatchNormalization(epsilon=eps, axis=bn_axis, name=conv_name_base+'_bn')(x)
x = Activation('relu', name=relu_name_base)(x)
x = Conv2D(int(nb_filter * compression), 1, 1, name=conv_name_base, use_bias=False)(x)
if dropout_rate:
x = Dropout(dropout_rate)(x)
print(x.shape)
x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x)
return x
以DenseNet-121 为例(169 和201 的参数已注释在代码中了):
完整代码:
from keras import layers
from keras.models import Model
from keras.layers import Input, merge, ZeroPadding2D
from keras.layers import Dense, Dropout, Activation
from keras.layers import Conv2D
from keras.layers import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D
from keras.layers import BatchNormalization
import keras.backend as K
from keras.layers.convolutional import Convolution2D
import tensorflow as tf
def conv_block(x,stage,branch,nb_filter,dropout_rate = None):
# 参数
# x: input tensor #输入张量
# stage: index for dense block #第几个dense_block
# branch: layer index within each dense block #dense_block 中各层的索引
# nb_filter: number of filters #卷积核的个数
# dropout_rate: dropout rate #dropout 参数
eps = 1.1e-5
bn_axis =3
conv_name_base = 'conv' + str(stage) + '_' + str(branch)
relu_name_base = 'relu' + str(stage) + '_' + str(branch)
inter_channels = 4 * nb_filter
# 1x1 的卷积用于调整输入tensor 的通道数
x = BatchNormalization(epsilon=eps,axis=bn_axis,name=conv_name_base+'_x1_bn')(x)
x = Activation('relu',name=relu_name_base+'_x1')(x)
x = Conv2D(inter_channels,1,1,name=conv_name_base+'_x1',use_bias=False)(x)
print("x in conv: ",x.shape)
if dropout_rate:
x = Dropout(dropout_rate)(x)
# 3x3 的卷积
x = BatchNormalization(epsilon=eps,axis=bn_axis,name=conv_name_base+'_x2_bn')(x)
x = Activation('relu',name=relu_name_base+'_x2')(x)
x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x)
x = Conv2D(nb_filter, 3, 1, name=conv_name_base+'_x2', use_bias=False)(x)
print("x in conv2: ",x.shape)
if dropout_rate:
x = Dropout(dropout_rate)(x)
return x
def transition_block(x,stage,nb_filter,compression=1.0,dropout_rate = None):
# dense block 中 concat 特征层太多了 通过这个来缩小一下
# x: input tensor #输入张量
# stage: index for dense block #第几个trainsition_block
# nb_filter: number of filters #卷积核的个数
# compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. # 减少特征图的比例
# dropout_rate: dropout rate #dropout 参数
eps = 1.1e-5
bn_axis =3
conv_name_base = 'conv' + str(stage) + '_blk'
relu_name_base = 'relu' + str(stage) + '_blk'
pool_name_base = 'pool' + str(stage)
x = BatchNormalization(epsilon=eps, axis=bn_axis, name=conv_name_base+'_bn')(x)
x = Activation('relu', name=relu_name_base)(x)
x = Conv2D(int(nb_filter * compression), 1, 1, name=conv_name_base, use_bias=False)(x)
if dropout_rate:
x = Dropout(dropout_rate)(x)
print(x.shape)
x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x)
return x
def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None,grow_nb_filters=True):
# Build a dense_block where the output of each conv_block is fed to subsequent ones
# # Arguments
# x: input tensor
# stage: index for dense block
# nb_layers: the number of layers of conv_block to append to the model. 添加卷积块的个数
# nb_filter: number of filters 卷积核的个数
# growth_rate: growth rate 增长率
# dropout_rate: dropout rate dropout
# grow_nb_filters: flag to decide to allow number of filters to grow 是否允许卷积核的个数变多
concat_feat = x
for i in range(nb_layers):
branch = i + 1
x = conv_block(concat_feat,stage,branch,growth_rate,dropout_rate)
concat_feat = layers.Concatenate(axis=3,name='concat_'+str(stage)+'_'+str(branch))([concat_feat,x])
if grow_nb_filters:
nb_filter += growth_rate
return concat_feat, nb_filter
def DenseNet(nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.0, dropout_rate=0.0, weight_decay=1e-4, classes=1000, weights_path=None):
'''Instantiate the DenseNet 121 architecture,
# Arguments
nb_dense_block: number of dense blocks to add to end
growth_rate: number of filters to add per dense block
nb_filter: initial number of filters
reduction: reduction factor of transition blocks.
dropout_rate: dropout rate
weight_decay: weight decay factor
classes: optional number of classes to classify images
weights_path: path to pre-trained weights
# Returns
A Keras model instance.
'''
eps = 1.1e-5
# compute compression factor
compression = 1.0 - reduction
# Handle Dimension Ordering for different backends
global concat_axis
concat_axis =3
nb_filter = 64
nb_layers = [6,12,24,16] # For DenseNet-121
#nb_layers = [6, 12, 32, 32] # For DenseNet-169
#nb_layers = [6, 12, 48, 32] # For DenseNet-201
img_input = Input(shape=(224, 224, 3), name='data')
# Initial convolution
x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input)
x = Conv2D(nb_filter, 3, 1, name='conv1', use_bias=False)(x)
x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x)
x = Activation('relu', name='relu1')(x)
x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x)
x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x)
print("first",x.shape)
# Add dense blocks
for block_idx in range(nb_dense_block - 1):
stage = block_idx+2
x, nb_filter = dense_block(x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate)
# Add transition_block
x = transition_block(x, stage, nb_filter, compression=compression, dropout_rate=dropout_rate)
nb_filter = int(nb_filter * compression)
final_stage = stage + 1
x, nb_filter = dense_block(x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate)
x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv'+str(final_stage)+'_blk_bn')(x)
x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x)
x = GlobalAveragePooling2D(name='pool'+str(final_stage))(x)
x = Dense(classes, name='fc6')(x)
x = Activation('softmax', name='prob')(x)
model = Model(img_input, x, name='densenet')
if weights_path is not None:
model.load_weights(weights_path)
return model
if __name__ == '__main__':
img = tf.random.uniform([1,3, 256, 256])
x = transition_block(img,1,32)
print(x.shape)
model = DenseNet()
model.summary()
总结
DenseNet 这个技巧用来改 其他网络结构应该还不错。把它复现一下。