本来打算利用AlexNet对cifar-10进行分类,但是因为这个数据集里面的图片是32*32的,要是网络结构完全按照AlexNet的思路就可能卷没了。因此我一开始稍微调整了一下AlexNet的网络层参数,然后跑了一下,虽然利用了GPU加速,每运行一次迭代仍然需要将近5分钟,在迭代了20次后预测准确率还在60%徘徊。
因此我更改了网络结构,由于图片像素值较小,局部特征可能并没有那么多,因此我感觉较深的卷积可能不太利于特征的提取,因此我将卷积核的数目减少了很多,并只设置了两个卷积池化层。
这一部分的pytorch代码如下:
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2, bias=False), # (32-5+4)/1+1=32 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=0), # (32-3)/2+1=15 nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=2, bias=False), # (15-5+4)/1+1=15 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=0), # (15-3)/2+1=7
在用两层卷积池化进行特征提取之后,就可以利用全连接层进行分类了,这一部分的参数我根据上一部分得到的输出结果大小也做了调整。
self.feature_classify = torch.nn.Sequential( nn.Dropout(p=0.25), nn.Linear(in_features=64* 7 * 7, out_features=384), nn.Dropout(p=0.25), nn.Linear(in_features=384, out_features=192), nn.Linear(in_features=192, out_features=10), )
利用魔改的CNN进行分类后预测结果如下:
采用了GPU加速 Files already downloaded and verified Files already downloaded and verified epoch:1,acc:0.2820,loss:768.9158 epoch:1,test_acc:0.3766,test_loss:135.6541 epoch:2,acc:0.4303,loss:620.1400 epoch:2,test_acc:0.4483,test_loss:122.1497 epoch:3,acc:0.4923,loss:555.9510 epoch:3,test_acc:0.5335,test_loss:105.0851 epoch:4,acc:0.5407,loss:509.0777 epoch:4,test_acc:0.5692,test_loss:95.4788 epoch:5,acc:0.5817,loss:466.2306 epoch:5,test_acc:0.5987,test_loss:90.4952 epoch:6,acc:0.6119,loss:435.5047 epoch:6,test_acc:0.6311,test_loss:84.1474 epoch:7,acc:0.6415,loss:406.7112 epoch:7,test_acc:0.6599,test_loss:77.7228 epoch:8,acc:0.6604,loss:383.8425 epoch:8,test_acc:0.6669,test_loss:75.7800 epoch:9,acc:0.6766,loss:365.9761 epoch:9,test_acc:0.6894,test_loss:72.6862 epoch:10,acc:0.6944,loss:347.8030 epoch:10,test_acc:0.7098,test_loss:67.5863 epoch:11,acc:0.7059,loss:335.6827 epoch:11,test_acc:0.7047,test_loss:68.2921 epoch:12,acc:0.7164,loss:323.1924 epoch:12,test_acc:0.7129,test_loss:67.2043 epoch:13,acc:0.7281,loss:310.9101 epoch:13,test_acc:0.7149,test_loss:68.1935 epoch:14,acc:0.7335,loss:304.4421 epoch:14,test_acc:0.7292,test_loss:63.6596 epoch:15,acc:0.7413,loss:294.5465 epoch:15,test_acc:0.7269,test_loss:64.1225 epoch:16,acc:0.7502,loss:286.1364 epoch:16,test_acc:0.7096,test_loss:66.8780 epoch:17,acc:0.7562,loss:279.5722 epoch:17,test_acc:0.7455,test_loss:60.0752 epoch:18,acc:0.7588,loss:274.7319 epoch:18,test_acc:0.7461,test_loss:59.7839 epoch:19,acc:0.7670,loss:265.4509 epoch:19,test_acc:0.7527,test_loss:57.7525 epoch:20,acc:0.7703,loss:261.4555 epoch:20,test_acc:0.7368,test_loss:62.2542 epoch:21,acc:0.7752,loss:255.8207 epoch:21,test_acc:0.7434,test_loss:59.8888 epoch:22,acc:0.7802,loss:250.5966 epoch:22,test_acc:0.7469,test_loss:58.5952 epoch:23,acc:0.7857,loss:245.0714 epoch:23,test_acc:0.7461,test_loss:60.0502 epoch:24,acc:0.7886,loss:240.5701 epoch:24,test_acc:0.7574,test_loss:57.4417 epoch:25,acc:0.7923,loss:235.4422 epoch:25,test_acc:0.7467,test_loss:59.2719
该项目源代码如下:
import torch import torch.nn.functional as F import matplotlib.pyplot as plt from torch import nn from torchvision import datasets, transforms from torch.utils.data import DataLoader EPOCH = 25 if torch.cuda.is_available(): device = torch.device("cuda") print("采用了GPU加速") else: device = torch.device("cpu") print("GPU无法正常使用") transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) train_set = datasets.CIFAR10('../pytorch_', train=True, download=True, transform=transform) test_set = datasets.CIFAR10('../pytorch_', train=False, download=True, transform=transform) train_data = DataLoader(train_set, batch_size=128, shuffle=True) # torch.Size([128]) :y # torch.Size([128, 3, 32, 32]) :x test_data = DataLoader(test_set, batch_size=128, shuffle=True) class AlexNet(torch.nn.Module): def __init__(self): super(AlexNet, self).__init__() self.feature_extraction = torch.nn.Sequential( # nn.Conv2d(in_channels=3, out_channels=96, kernel_size=3, stride=2, padding=2, bias=False), # # # (32-3+4)/2+1=17 # # nn.ReLU(inplace=True), # # nn.MaxPool2d(kernel_size=3, stride=1, padding=0), # (17-3)/1+1=15 # # nn.Conv2d(in_channels=96, out_channels=192, kernel_size=5, stride=1, padding=2, bias=False), # # # (15-5+4)/1+1=15 # # nn.ReLU(inplace=True), # # nn.MaxPool2d(kernel_size=3, stride=1, padding=0), # (15-3)/1+1=13 # # nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, stride=1, padding=1, bias=False), # # # (13-3+2)/1+1=13 # # nn.ReLU(inplace=True), # # nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False), # # # (13-3+2)/1+1=13 # # nn.ReLU(inplace=True), # # nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False), # # # (13-3+2)/1+1=13 # # nn.ReLU(inplace=True), # # nn.MaxPool2d(kernel_size=3, stride=2, padding=0), # (13-3)/2+1=6 nn.Conv2d(in_channels=3, out_channels=64, kernel_size=5, stride=1, padding=2, bias=False), # (32-5+4)/1+1=32 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=0), # (32-3)/2+1=15 nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=2, bias=False), # (15-5+4)/1+1=15 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=0), # (15-3)/2+1=7 ) self.feature_classify = torch.nn.Sequential( nn.Dropout(p=0.25), nn.Linear(in_features=64* 7 * 7, out_features=384), nn.Dropout(p=0.25), nn.Linear(in_features=384, out_features=192), nn.Linear(in_features=192, out_features=10), ) def forward(self, x): x = self.feature_extraction(x) x = x.view(x.size(0),64 * 7*7) x = self.feature_classify(x) return x net = AlexNet() net = net.to(device) optimizer = torch.optim.SGD(net.parameters(), lr=0.02) loss_func = torch.nn.CrossEntropyLoss() def train(model, epoch): model.train() loss_all = 0 acc_all = 0 for i, (data, target) in enumerate(train_data): data, target = data.to(device), target.to(device) out = net(data) # print(out.size()) # print(target.size()) loss = loss_func(out, target) optimizer.zero_grad() loss.backward() optimizer.step() _, pre = out.max(1) train_correct = (pre == target).sum().float() acc = train_correct / data.shape[0] loss_all += float(loss) acc_all += acc print('epoch:{},acc:{:.4f},loss:{:.4f}'.format(epoch + 1, acc_all / len(train_data), loss_all)) def test(model, epoch): model.eval() loss_all = 0 acc_all = 0 for i, (data, target) in enumerate(test_data): data, target = data.to(device), target.to(device) out = net(data) # print(out.size()) # print(target.size()) loss = loss_func(out, target) _, pre = out.max(1) test_correct = (pre == target).sum().float() acc = test_correct / data.shape[0] loss_all += float(loss) acc_all += acc print('epoch:{},test_acc:{:.4f},test_loss:{:.4f}'.format(epoch + 1, acc_all / len(test_data), loss_all)) def main(): for i in range(EPOCH): train(net, i) test(net,i) if __name__ == '__main__': main() # if __name__=='__main__': # net=AlexNet() # print(net) # input=torch.rand(128,3,32,32) # out=net(input) # print(out) # print(out.shape) # for t,(x,y) in enumerate(train_data): # if t==0: # print(y) # print(y.size()) # print(x.size())