GoogLenet
GoogLenet网络模型主要采用一系列Inception模型构成,本文只罗列了其中一种
import torch
import torch.nn as nn
import torch.nn.functional as F
class Inception(nn.Module):
def __init__(self, in_c, c1, c2, c3, c4):
'''
in_c表示输入数据的通道
c1, c2, c3, c4表示输出数据的通道
'''
super(Inception, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_c, out_channels=c1,kernel_size=1)
self.conv2_1 = nn.Conv2d(in_channels=in_c, out_channels=c2[0], kernel_size=1)
self.conv2_2 = nn.Conv2d(in_channels=c2[0], out_channels=c2[1], kernel_size=3, padding=1)
self.conv3_1 = nn.Conv2d(in_channels=in_c, out_channels=c3[0], kernel_size=1)
self.conv3_2 = nn.Conv2d(in_channels=c3[0], out_channels=c3[1], kernel_size=5, padding=2)
self.maxpool4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
self.conv4_2 = nn.Conv2d(in_channels=in_c, out_channels=c4 ,kernel_size=1)
def forward(self, x):
p1 = F.relu(self.conv1(x))
p2 = F.relu(self.conv2_2(F.relu(self.conv2_1(x))))
p3 = F.relu(self.conv3_2(F.relu(self.conv3_1(x))))
p4 = F.relu(self.conv4_2(F.relu(self.maxpool4_1(x))))
return torch.cat([p1, p2, p3, p4], dim=1)
class GoogLeNet(nn.Module):
def __init__(self):
super(GoogLeNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3),
F.relu(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=0),
F.relu(),
nn.Conv2d(in_channels=64, out_channels=192, kernel_size=3, stride=1),
F.relu(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
Inception(192, 64, (96, 128), (16, 32), (32)),
Inception(256, 128, (128, 192), (32, 96), (64)),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
Inception(480, 192, (96, 208), (16, 48), (64)),
Inception(512, 160, (112, 224), (24, 64), (64)),
Inception(512, 128, (128, 256), (24, 64), (64)),
Inception(512, 112, (144, 288), (32, 64), (64)),
Inception(528, 256, (160, 320), (32, 128), (128)),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
Inception(832, 256, (160, 320), (32, 128), (128)),
Inception(832, 384, (192, 384), (48, 128), (128)),
nn.AvgPool2d(kernel_size=7, padding=3, stride=1),
)
self.classifiler = nn.Sequential(
nn.Dropout(0.4),
nn.Linear(1024, 1000),
)
def forward(self, x):
x = self.features(x)
x = x.view(-1, 1024)
x = self.classifiler(x)
return x
Inception结构:
Inception模块的基本结构贡献主要有两个:一是采用1x1的卷积进行升降维;二是在多个尺寸上同时进行卷积再聚合
本文中卷积的作用
-
1 x 1卷积的作用
通过组合不同尺寸的卷积特征增加模型的非线性能力,例如一个3x3卷积的激活近似于分(x)=ax2 + bx + c
,一个1x1的卷积+激活函数近似于f2(x)=mx2+nx+q, 很明显组合后的f2(f1(x))非线性能力更强 -
采用1x1卷积降低了计算复杂度,对于输入(192,32, 32),只采用卷积核为(192, 3, 3 )* 256,需要计算256x192x 32x32x32x3x3=452,984,832次乘法,但是通过组合1x1的卷积和3x3的卷积只需要计算
192x32x32x1x1x96 + 32x32x3x3x96x256=245,366,784次乘法,大大降低了计算量 -
多尺度卷积再聚合
在特征图上的特征往往是稀疏存在的
特征的分布类似上述情况,此时我们可以采用不同尺寸的卷积核去迎合这种特征的分布,达到特征聚合的目的