一、主要方法
⑴深度学习框架采用的pytorch,采用nn.Sequential搭建残差块,采用nn.moudle搭建网络结构。
⑵卷积之后采用BatchNorm对featuremap进行处理,防止出现梯度消失或梯度爆炸。
⑶损失函数采用CrossEntropyLoss,优化器为Adam
⑷模型采用ResNet50,搭建好后连同数据导入GPU进行训练
二、图像预处理
因为dog-breed-identification数据集中的图片大小是不统一的,因此需要对图像进行预处理,通过Opencv提供的函数将图片统一大小为(3,224,224),即大小为224*224的三通道图片。流程如图2.1所示,代码如下:(当然,该数据集中的标签给的是狗狗种类名,需要自己处理一下CSV文件,用数字作为标签,方便后续one-hot编码)
def Data_process(id_batch):
m_batch = len(id_batch) # 一个batch的大小
arr_batch = np.ones((m_batch, 3, 224, 224)) #初始化储存图片数据的数组
X_batch = torch.tensor(arr_batch) #把数组转为张量
for j in range(len(id_batch)):
path='E:\\\\Classic network\\\\data\\\\train\\\\'+id_batch[j]+'.jpg' # 当前batch中第j张图片的路径
img = cv2.imread(path,1) #以彩色图读入图片
img=cv2.resize(img,(224,224)) #图片大小统一为224*224
img=img.reshape(3,224,224)
img=torch.tensor(img) #把图片转为张量以储存
X_batch[j,:,:,:]=img #储存图片
return X_batch
三、模型搭建
(一)残差块的搭建
残差块结构如图3.1所示,每个残差块由左右两部分组成。右边依次为2D卷积、BatchNormalization、Relu激活,以此循环三次,但最后一次不激活,等到与左边连接后共同激活。左边要么是直接与右端连接,要么是经过1*1的卷积核对Feature Map进行降维之后再与右端连接,代码如下:
class ResidualBlock(nn.Module):
# 实现子module: Residual Block
def __init__(self, inchannel, outchannel, stride,shortcut=None):
super(ResidualBlock, self).__init__()
self.right=nn.Sequential(
nn.Conv2d(inchannel,inchannel,1,1,0,bias=False),
nn.BatchNorm2d(inchannel),
nn.ReLU(inplace=True),
nn.Conv2d(inchannel, inchannel,3,stride,1, bias=False),
nn.BatchNorm2d(inchannel),
nn.ReLU(inplace=True),
nn.Conv2d(inchannel, outchannel,1, 1, 0, bias=False),
nn.BatchNorm2d(outchannel)
)
self.left = shortcut
def forward(self, x):
out = self.right(x)
residual = x if self.left is None else self.left(x)
out += residual
return F.relu(out)
(二)模型搭建
首先看一下ResNet50的网络结构图,如图3.2所示。
可以认为,ResNet50是由四类残结构组成,第一类残差结构(浅蓝色)有三个,第二类残差结构(浅橘色)有四个,第三类残差结构(黄色)有六个,第四类残差结构(分红色)有三个。将上述残差结构按照搭建残差块的方式搭建好后按照前馈方式进行连接即可。代码如下 :
class ResNet50(nn.Module):
# 实现主module:ResNet34
# ResNet50包含多个layer,每个layer又包含多个residual block
# 用子module实现residual block,用_make_layer函数实现layer
def __init__(self, num_classes=120):
super(ResNet50, self).__init__()
# 前几层图像转换
self.pre = nn.Sequential(
nn.Conv2d(3,64, 7,2,3,bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(3,2,1)
)
# 重复的layer,分别有3,4,6,3个residual block
self.layer1 = self._make_layer(64, 256, 3,1)
self.layer2 = self._make_layer(256,512, 4,2)
self.layer3 = self._make_layer(512,1024,6,2)
self.layer4 = self._make_layer(1024,2048,3,2)
# 分类用的全连接
self.fc = nn.Linear(2048,120)
def _make_layer(self,inchannel,outchannel,block_num,stride):
# 构造layer,包含多个residual block
shortcut = nn.Sequential(
nn.Conv2d(inchannel,outchannel,1,stride, bias=False),
nn.BatchNorm2d(outchannel)
)
layers = []
layers.append(ResidualBlock(inchannel, outchannel,stride, shortcut))
for i in range(1,block_num):
layers.append(ResidualBlock(outchannel, outchannel,1))
return nn.Sequential(*layers)
def forward(self, x):
x = self.pre(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = F.avg_pool2d(x,7)
x = x.view(x.size(0), -1)
return self.fc(x)
四、完整代码
from torch import nn
import torch
from torch.nn import functional as F
import torch.optim as optim
import cv2
import pandas as pd
import numpy as np
##(输入通道,输出通道,卷积核大小,stride,padding)
class ResidualBlock(nn.Module):
# 实现子module: Residual Block
def __init__(self, inchannel, outchannel, stride,shortcut=None):
super(ResidualBlock, self).__init__()
self.right=nn.Sequential(
nn.Conv2d(inchannel,inchannel,1,1,0,bias=False),
nn.BatchNorm2d(inchannel),
nn.ReLU(inplace=True),
nn.Conv2d(inchannel, inchannel,3,stride,1, bias=False),
nn.BatchNorm2d(inchannel),
nn.ReLU(inplace=True),
nn.Conv2d(inchannel, outchannel,1, 1, 0, bias=False),
nn.BatchNorm2d(outchannel)
)
self.left = shortcut
def forward(self, x):
out = self.right(x)
residual = x if self.left is None else self.left(x)
out += residual
return F.relu(out)
##(输入通道,输出通道,卷积核大小,stride,padding)
class ResNet50(nn.Module):
# 实现主module:ResNet34
# ResNet50包含多个layer,每个layer又包含多个residual block
# 用子module实现residual block,用_make_layer函数实现layer
def __init__(self, num_classes=120):
super(ResNet50, self).__init__()
# 前几层图像转换
self.pre = nn.Sequential(
nn.Conv2d(3,64, 7,2,3,bias=False),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(3,2,1)
)
# 重复的layer,分别有3,4,6,3个residual block
self.layer1 = self._make_layer(64, 256, 3,1)
self.layer2 = self._make_layer(256,512, 4,2)
self.layer3 = self._make_layer(512,1024,6,2)
self.layer4 = self._make_layer(1024,2048,3,2)
# 分类用的全连接
self.fc = nn.Linear(2048,120)
def _make_layer(self,inchannel,outchannel,block_num,stride):
# 构造layer,包含多个residual block
shortcut = nn.Sequential(
nn.Conv2d(inchannel,outchannel,1,stride, bias=False),
nn.BatchNorm2d(outchannel)
)
layers = []
layers.append(ResidualBlock(inchannel, outchannel,stride, shortcut))
for i in range(1,block_num):
layers.append(ResidualBlock(outchannel, outchannel,1))
return nn.Sequential(*layers)
def forward(self, x):
x = self.pre(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = F.avg_pool2d(x,7)
x = x.view(x.size(0), -1)
return self.fc(x)
model=ResNet50()
device = torch.device('cuda:0') #数字切换卡号)
model.to(device)
#########################################################子函数#######################################################################
###1.图像预处理
def Data_process(id_batch):
m_batch = len(id_batch) # 一个batch的大小
arr_batch = np.ones((m_batch, 3, 224, 224)) #初始化储存图片数据的数组
X_batch = torch.tensor(arr_batch) #把数组转为张量
for j in range(len(id_batch)):
path='E:\\\\Classic network\\\\data\\\\train\\\\'+id_batch[j]+'.jpg' # 当前batch中第j张图片的路径
img = cv2.imread(path,1) #以彩色图读入图片
img=cv2.resize(img,(224,224)) #图片大小统一为224*224
img=img.reshape(3,224,224)
img=torch.tensor(img) #把图片转为张量以储存
X_batch[j,:,:,:]=img #储存图片
return X_batch
######################################################主函数#############################################################################
data=pd.read_csv(r'E:\Classic network\data\labels.csv') # 把文件中的数据读入data
id=data['id'].values # 把id列的值读入id
label= np.matrix((data['breed'].values)) # 把breed列的值读入label
optimizer = optim.Adam(model.parameters(),lr=0.0001) #选择Adam优化器
criterion = nn.CrossEntropyLoss() #选择交叉熵所谓损失
batch=32 #batch大小与迭代次数
iters=100
for i in range(iters):
for t in range(int(len(id)/batch)):
X_batch=Data_process(id[t*batch:(t+1)*batch]) #选择一个batch的图片进行预处理
Y_batch=label[:,t*batch:(t+1)*batch].A #选择该batch对应的标签并转为数组
X_batch = torch.tensor(X_batch, dtype=torch.float32) #把该batch的图片信息转为tensor的float32类型
Y_batch=np.squeeze(Y_batch) #对标签降维后转为tensor的long型
Y_batch= torch.tensor(Y_batch, dtype=torch.long)
optimizer.zero_grad() #将梯度归零
X_batch=model(X_batch.to(device)) #对当前batch进行预测
correct=int(((torch.argmax(X_batch,1)==Y_batch.to(device))).sum())/batch #计算当前batch准确率
loss = criterion(X_batch,Y_batch.to(device)) #计算当前batch损失值
print("iters:%d"%(i+1),"\t","process:%0.5f"%((t/int(len(id)/batch))*100),"%","\t","loss:%.5f"%loss,"\t","correct:%0.5f"%(correct*100),"%") #输出进度,损失值,准确率
loss.backward() # 反向传播
optimizer.step() # 通过梯度做一步参数更新
五、训练结果
很快可以达到98%以上,时间太长了,实验结果截图丢了