MNIST手写数字识别:卷积神经网络

代码

import torch
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms #从torchvision中引入图像转换

#采用随机批量梯度下降,batch_size设为64
batch_size = 64
#用Compose串联多个“图片变换操作”(此处将ToTensor和Normalize组合)
transform = transforms.Compose([
    #ToTensor()将shape为(H, W, C)de numpy.darray或者img转为shape为(C, H, W)的tensor,其将每一个数值归一化到(0,1)
    transforms.ToTensor(),
    #标准化:使用公式" (x - mean) / std ",将每一个元素分布到(-1, 1)
    transforms.Normalize(mean = (0.1307,), std = (0.3081,)) #由于mnist数据集的图片均为灰度图片(单通道),所以mean和std各自值输入了一个值
])

# 获取训练集
train_dataset = datasets.MNIST(
    #指定保存路径
    root = "./mnist",
    #获取的是训练集
    train = True,
    #若在指定路径下找不到目标文件则会自动下载
    download = True,
    #对所获取的数据集执行上述的transform处理
    transform = transform
)
# 获取测试集
test_dataset = datasets.MNIST(
    root = "./mnist",
    train = False,
    download = True,
    transform = transform
)
# 定义数据加载器
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)

# 定义网络模型
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 第一层卷积层采用Conv2d模块:输入1维,输出10维,卷积核尺寸5x5(此处输入输出的维度表示的是通道数),不扩充(padding),不设偏置
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5, padding=0, bias=False)
        self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
        # 池化层采用MaxPool2d模块:kernel_size=2表示池化窗口大小为2x2
        self.pooling = torch.nn.MaxPool2d(kernel_size=2) 
        self.fc = torch.nn.Linear(320, 10)
        
    def forward(self, x):
        #定义batch的大小是数据张量的第0个维度的数据,也就是每次传入的批量大小
        batch_size = x.size(0)
        #先做卷积再做池化,然后激活
        x = F.relu(self.pooling(self.conv1(x)))
        x = F.relu(self.pooling(self.conv2(x)))
        # 改变x的形状,为了匹配FC层的输入(传入fc层的需为二维矩阵)
        x = x.view(batch_size, -1)
        #送入全连接层
        x = self.fc(x)
        return x
    
# 实例化模型
model = Model()

# 构造多分类交叉熵损失函数
criterion = torch.nn.CrossEntropyLoss()
# 构造优化器:优化模型中的所有参数,学习率=0.01, 加入一个冲量0.5
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum=0.5)

# 定义训练过程
def train(epoch):
    running_loss = 0
    for batch_idx, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        if(batch_idx+1) % 300 ==0:
            print(f' [Epoch:{epoch+1},Btach_idx:{batch_idx+1}],loss:{running_loss / 300:.3f} ')
            running_loss = 0
        
# 定义测试过程
def test():
    # 已经预测结束且预测正确的样本数(初始化为0 )
    correct = 0
    # 已经预测结束的样本数(初始化为0)
    total = 0
    with torch.no_grad(): #测试过程不需要梯度优化
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            # model最后输出的是一个10维的矩阵(1行10列),返回‘预测最大值predicted’和‘预测最大值下标’_
            _, predicted = torch.max(outputs.data, dim = 1) 
            #更新已预测结束的样本数
            total += labels.size(0) 
            # 更新已预测结束且预测正确的样本数
            correct += (predicted == labels).sum().item()
    print(f' Accuracy on testdatset:{100 * (correct/total):.2f}% ') #输出准确率

# 开始运行
if __name__ == '__main__':
    for epoch in range(10):
        train(epoch)
        test()

运行效果

 [Epoch:1,Btach_idx:300],loss:0.627 
 [Epoch:1,Btach_idx:600],loss:0.190 
 [Epoch:1,Btach_idx:900],loss:0.143 
 Accuracy on testdataset:96.71% 
 [Epoch:2,Btach_idx:300],loss:0.115 
 [Epoch:2,Btach_idx:600],loss:0.097 
 [Epoch:2,Btach_idx:900],loss:0.086 
 Accuracy on testdataset:97.69% 
 [Epoch:3,Btach_idx:300],loss:0.080 
 [Epoch:3,Btach_idx:600],loss:0.073 
 [Epoch:3,Btach_idx:900],loss:0.069 
 Accuracy on testdataset:97.86% 
 [Epoch:4,Btach_idx:300],loss:0.062 
 [Epoch:4,Btach_idx:600],loss:0.064 
 [Epoch:4,Btach_idx:900],loss:0.061 
 Accuracy on testdataset:98.44% 
 [Epoch:5,Btach_idx:300],loss:0.052 
 [Epoch:5,Btach_idx:600],loss:0.051 
 [Epoch:5,Btach_idx:900],loss:0.059 
 Accuracy on testdataset:98.50% 
 [Epoch:6,Btach_idx:300],loss:0.049 
 [Epoch:6,Btach_idx:600],loss:0.048 
 [Epoch:6,Btach_idx:900],loss:0.050 
 Accuracy on testdataset:98.45% 
 [Epoch:7,Btach_idx:300],loss:0.047 
 [Epoch:7,Btach_idx:600],loss:0.041 
 [Epoch:7,Btach_idx:900],loss:0.045 
 Accuracy on testdataset:98.36% 
 [Epoch:8,Btach_idx:300],loss:0.040 
 [Epoch:8,Btach_idx:600],loss:0.042 
 [Epoch:8,Btach_idx:900],loss:0.041 
 Accuracy on testdataset:98.73% 
 [Epoch:9,Btach_idx:300],loss:0.032 
 [Epoch:9,Btach_idx:600],loss:0.041 
 [Epoch:9,Btach_idx:900],loss:0.038 
 Accuracy on testdataset:98.57% 
 [Epoch:10,Btach_idx:300],loss:0.033 
 [Epoch:10,Btach_idx:600],loss:0.035 
 [Epoch:10,Btach_idx:900],loss:0.036 
 Accuracy on testdataset:98.59% 

补充

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 第一层卷积层采用Conv2d模块:输入1维,输出10维,卷积核尺寸5x5(此处输入输出的维度表示的是通道数),不扩充(padding),不设偏置
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5, padding=0, bias=False)
        self.conv2 = torch.nn.Conv2d(10, 20, kernel_size=5)
        # 池化层采用MaxPool2d模块:kernel_size=2表示池化窗口大小为2x2
        self.pooling = torch.nn.MaxPool2d(kernel_size=2) 
        self.fc = torch.nn.Linear(320, 10)
        
    def forward(self, x):
        #定义batch的大小是数据张量的第0个维度的数据,也就是每次传入的批量大小
        batch_size = x.size(0)
        #先做卷积再做池化,然后激活
        x = F.relu(self.pooling(self.conv1(x)))
        x = F.relu(self.pooling(self.conv2(x)))
        # 改变x的形状,为了匹配FC层的输入(传入fc层的需为二维矩阵)
        x = x.view(batch_size, -1)
        #送入全连接层
        x = self.fc(x)
        return x

Q:self.fc = torch.nn.Linear(320, 10)中的320在不通过手算推理的前提下如何得知?

A:随便填一个数字,运行代码,通过查看报错信息获取FC层的真实输入维数

MNIST手写数字识别:卷积神经网络
MNIST手写数字识别:卷积神经网络

上一篇:【pytorch】(断点)继续上次训练


下一篇:Pytorch 学习笔记-多分类网络的搭建(1)