2014年由牛津大学研究组Visual Geometry Group提出,论文地址Very Deep Convolutional Networks for Large-Scale Image Recognition
文章亮点:通过堆叠多个3×3卷积核来代替大尺度卷积核(减少所需参数)。
论文中提到:堆叠2个3×3的卷积核代替5×5的卷积核,堆叠3个3×3的卷积核代替7×7的卷积核,它们拥有相同的感受野。
基本概念拓展:CNN感受野
在CNN中,决定某一层输出结果中一个元素所对应的输入层的区域大小,被称作感受野(receptive field)。
F ( i ) = ( F ( i + 1 ) − 1 ) × S + k s F(i)=(F(i+1)-1)\times S+ks F(i)=(F(i+1)−1)×S+ks
例如, F = 1 F=1 F=1,经过3个3×3的卷积核后感受野为7×7
C
o
n
v
3
×
3
(
3
)
:
F
=
(
1
−
1
)
×
1
+
3
=
3
Conv3\times 3(3):F=(1-1)\times 1+3=3
Conv3×3(3):F=(1−1)×1+3=3
C
o
n
v
3
×
3
(
2
)
:
F
=
(
3
−
1
)
×
1
+
3
=
5
Conv3\times 3(2):F=(3-1)\times 1+3=5
Conv3×3(2):F=(3−1)×1+3=5
C
o
n
v
3
×
3
(
1
)
:
F
=
(
5
−
1
)
×
1
+
3
=
7
Conv3\times 3(1):F=(5-1)\times 1+3=7
Conv3×3(1):F=(5−1)×1+3=7
pytorch实现
class VGG(nn.Module):
def __init__(self, features, num_classes=1000, init_weights=True):
super(VGG, self).__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
self.classifier = nn.Sequential(
nn.Linear(512 * 7 * 7, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
def make_layers(cfg, batch_norm=False):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
return nn.Sequential(*layers)
cfgs = {
'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
def _vgg(arch, cfg, batch_norm, pretrained, progress, **kwargs):
if pretrained:
kwargs['init_weights'] = False
model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
if pretrained:
state_dict = load_state_dict_from_url(model_urls[arch],
progress=progress)
model.load_state_dict(state_dict)
return model
def vgg11(pretrained=False, progress=True, **kwargs):
return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)
def vgg11_bn(pretrained=False, progress=True, **kwargs):
return _vgg('vgg11_bn', 'A', True, pretrained, progress, **kwargs)
def vgg13(pretrained=False, progress=True, **kwargs):
return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)
def vgg13_bn(pretrained=False, progress=True, **kwargs):
return _vgg('vgg13_bn', 'B', True, pretrained, progress, **kwargs)
def vgg16(pretrained=False, progress=True, **kwargs):
return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)
def vgg16_bn(pretrained=False, progress=True, **kwargs):
return _vgg('vgg16_bn', 'D', True, pretrained, progress, **kwargs)
def vgg19(pretrained=False, progress=True, **kwargs):
return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)
def vgg19_bn(pretrained=False, progress=True, **kwargs):
return _vgg('vgg19_bn', 'E', True, pretrained, progress, **kwargs)