RepVGG: Making VGG-style ConvNets Great Again
paper:https://arxiv.org/pdf/2101.03697.pdf
code:https://github.com/DingXiaoH/RepVGG
摘要
提出了一个简单而强大的卷积神经网络结构,它具有一个类似VGG的推理时间体,只由3×3卷积和ReLU组成,而训练时间模型有一个多分支拓扑。训练时间和推理时间体系结构的解耦是通过结构再参数化技术实现的,因此该模型被命名为RepVGG。在ImageNet上,RepVGG的最高精度达到了80%以上,这是普通模型的第一次。在NVIDIA1080TiGPU上,RepVGG模型的运行速度比ResNet-50快83%,比ResNet-101快101%,与效率网和RegNet等最先进的模型相比,显示出良好的精度-速度权衡。
论文主要思想
利用3x3卷积速度非常快,单路结构省内存且运行效率高,以及残差能够防止梯度消失/爆炸的特点。结合卷积、BN的分配律性质,构建出强大的卷积神经网络结构。
RepVGG主体结构
RepVGG模块
Keras实现
以下是根据论文和Tensorflow2.x源码实现的keras版本(支持Tensorflow1.x)。特征通道必须channel last。代码链接
训练部分:
def rep_vgg(self, input, filters, kernel_size, name, dilation_rate=1,
use_bias=False, use_bn=True, model='Add', padding='same'):
"""
|
|
-------------------------
| | |
| | |
1x1 kxk |
| | |
| | |
BN BN BN
| | |
| | |
-----------combine-------
|
|
RepVGG
"""
in_dim = int(input.shape[-1])
assert in_dim == filters
x = None
if self.stage == 'train':
conv_1x1 = Conv2D(filters, (1, 1), padding=padding, use_bias=use_bias,
dilation_rate=(dilation_rate, dilation_rate), name=name + '_conv_1x1')(input)
conv_kxk = Conv2D(filters, (kernel_size, kernel_size), padding=padding, use_bias=use_bias,
dilation_rate=(dilation_rate, dilation_rate), name=name + '_conv_kxk')(input)
if use_bn:
conv_1x1 = BatchNormalization(name=name + '_bn_1')(conv_1x1)
conv_kxk = BatchNormalization(name=name + '_bn_2')(conv_kxk)
input = BatchNormalization(name=name + '_bn_3')(input)
if model == 'Add':
x = Add(name=name + '_add')([input, conv_1x1, conv_kxk])
elif model == 'Concate':
x = Concatenate(name=name + '_concate')([input, conv_1x1, conv_kxk])
else:
if model == 'Add':
x = Conv2D(filters, kernel_size, dilation_rate=dilation_rate,
padding='same', name=name)(input)
elif model == 'Concate':
x = Conv2D(3*filters, kernel_size, dilation_rate=dilation_rate,
padding='same', name=name)(input)
self.dbb_block_names['rep_vgg'].append([name, use_bias, use_bn, model, None])
return x
融合部分:
def fusion_rep_vgg(AC_names, trained_model, infer_model):
"""
|
|
-------------------------
| | |
| | |
1x1 kxk |
| | |
| | |
BN BN BN
| | |
| | |
-----------combine-------
|
|
RepVGG
"""
for layer_name, use_bias, use_bn, model, epoch in AC_names:
conv_kxk_weights = trained_model.get_layer(layer_name + '_conv_kxk').get_weights()[0]
conv_1x1_weights = trained_model.get_layer(layer_name + '_conv_1x1').get_weights()[0]
if use_bias:
conv_kxk_bias = trained_model.get_layer(layer_name + '_conv_kxk').get_weights()[1]
conv_1x1_bias = trained_model.get_layer(layer_name + '_conv_1x1').get_weights()[1]
else:
conv_kxk_bias = np.zeros((conv_kxk_weights.shape[-1],))
conv_1x1_bias = np.zeros((conv_1x1_weights.shape[-1],))
if use_bn:
gammas_1x1, betas_1x1, means_1x1, var_1x1 = trained_model.get_layer(layer_name + '_bn_1').get_weights()
gammas_kxk, betas_kxk, means_kxk, var_kxk = trained_model.get_layer(layer_name + '_bn_2').get_weights()
gammas_res, betas_res, means_res, var_res = trained_model.get_layer(layer_name + '_bn_3').get_weights()
else:
gammas_1x1, betas_1x1, means_1x1, var_1x1 = [np.ones((conv_1x1_weights.shape[-1],)),
np.zeros((conv_1x1_weights.shape[-1],)),
np.zeros((conv_1x1_weights.shape[-1],)),
np.ones((conv_1x1_weights.shape[-1],))]
gammas_kxk, betas_kxk, means_kxk, var_kxk = [np.ones((conv_kxk_weights.shape[-1],)),
np.zeros((conv_kxk_weights.shape[-1],)),
np.zeros((conv_kxk_weights.shape[-1],)),
np.ones((conv_kxk_weights.shape[-1],))]
gammas_res, betas_res, means_res, var_res = [np.ones((conv_1x1_weights.shape[-1],)),
np.zeros((conv_1x1_weights.shape[-1],)),
np.zeros((conv_1x1_weights.shape[-1],)),
np.ones((conv_1x1_weights.shape[-1],))]
w_kxk = (gammas_kxk / np.sqrt(np.add(var_kxk, 1e-10))) * conv_kxk_weights
kernel_size = w_kxk.shape[0]
in_channels = w_kxk.shape[2]
w_1x1 = np.zeros_like(w_kxk)
w_1x1[kernel_size // 2, kernel_size // 2, :, :] = (gammas_1x1 / np.sqrt(np.add(var_1x1, 1e-10))) * conv_1x1_weights
w_res = np.zeros_like(w_kxk)
for i in range(in_channels):
w_res[kernel_size // 2, kernel_size // 2, i % in_channels, i] = 1
w_res = ((gammas_res / np.sqrt(np.add(var_res, 1e-10))) * w_res)
b_1x1 = (((conv_1x1_bias - means_1x1) * gammas_1x1) / np.sqrt(np.add(var_1x1, 1e-10))) + betas_1x1
b_kxk = (((conv_kxk_bias - means_kxk) * gammas_kxk) / np.sqrt(np.add(var_kxk, 1e-10))) + betas_kxk
b_res = (((0 - means_res) * gammas_res) / np.sqrt(np.add(var_res, 1e-10))) + betas_res
weight = [w_res, w_1x1, w_kxk]
bias = [b_res, b_1x1, b_kxk]
infer_model.get_layer(layer_name).set_weights(diff_model(model, weight, bias))
声明:本内容来源网络,版权属于原作者,图片来源原论文。如有侵权,联系删除。