首先贴一份在cpu上运行的代码
1 import torch 2 from torchvision import transforms 3 from torchvision import datasets 4 from torch.utils.data import DataLoader 5 import torch.nn.functional as F 6 import torch.optim as optim 7 8 batch_size = 64 9 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) 10 train_dataset = datasets.MNIST(root=‘../dataset/mnist/‘, train=True, download=True, transform=transform) 11 train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size) 12 test_dataset = datasets.MNIST(root=‘../dataset/mnist/‘, train=False, download=True, transform=transform) 13 test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size) 14 15 16 class Net(torch.nn.Module): 17 def __init__(self): 18 super(Net, self).__init__() 19 self.l1 = torch.nn.Linear(784, 512) 20 self.l2 = torch.nn.Linear(512, 256) 21 self.l3 = torch.nn.Linear(256, 128) 22 self.l4 = torch.nn.Linear(128, 64) 23 self.l5 = torch.nn.Linear(64, 10) 24 25 def forward(self, x): 26 x = x.view(-1, 784) 27 x = F.relu(self.l1(x)) 28 x = F.relu(self.l2(x)) 29 x = F.relu(self.l3(x)) 30 x = F.relu(self.l4(x)) 31 return self.l5(x) 32 33 34 model = Net() 35 36 criterion = torch.nn.CrossEntropyLoss() 37 optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) 38 39 40 def train(epoch): 41 running_loss = 0.0 42 for batch_idx, data in enumerate(train_loader, 0): 43 inputs, target = data 44 optimizer.zero_grad() 45 # forward + backward + update 46 outputs = model(inputs) 47 loss = criterion(outputs, target) 48 loss.backward() 49 optimizer.step() 50 running_loss += loss.item() 51 if batch_idx % 300 == 299: 52 print(‘[%d, %5d] loss: %.3f‘ % (epoch + 1, batch_idx + 1, running_loss / 300)) 53 running_loss = 0.0 54 55 56 def test(): 57 correct = 0 58 total = 0 59 with torch.no_grad(): 60 for data in test_loader: 61 images, labels = data 62 outputs = model(images) 63 _, predicted = torch.max(outputs.data, dim=1) 64 total += labels.size(0) 65 correct += (predicted == labels).sum().item() 66 print(‘Accuracy on test set: %d %%‘ % (100 * correct / total)) 67 68 69 if __name__ == ‘__main__‘: 70 for epoch in range(10): 71 train(epoch) 72 test()
要在GPU上运行数据需要把一些相关的参数和模型转到GPU上
需要转换的有:model,数据,criterion(loss函数)
其中optimizer不需要转换
首先定义
1 device = t.device(‘cuda:0‘)
将model和criterion to(device)
1 #cuda 2 model = model.to(device) 3 criterion = criterion.to(device)
再将43行的inputs、target,46行的outputs to(device)到GPU上训练
1 def train(epoch): 2 running_loss = 0.0 3 for batch_idx, data in enumerate(train_loader, 0): 4 inputs, target = data 5 #cuda inputs and target 6 inputs = inputs.to(device) ### 7 target = target.to(device) ### 8 optimizer.zero_grad() # noticed: the grad return to zero before starting the loop 9 10 # forward + backward +update 11 outputs = model(inputs) 12 outputs = outputs.to(device) ### 13 loss = criterion(outputs, target) 14 loss.backward() 15 optimizer.step() 16 17 running_loss += loss.item() 18 if batch_idx % 300 == 299: 19 print(‘[%d, %5d] loss: %.3f‘ % (epoch + 1, batch_idx + 1, running_loss / 300)) 20 running_loss = 0.0
再将61行的images、labels,62行的outputs to(device)到GPU上测试
1 def test(): 2 correct = 0 3 total = 0 4 with t.no_grad(): # ensuring grad can not updating 5 for data in test_loader: 6 images, label = data 7 #cuda images, label 8 images = images.to(device) ### 9 label = label.to(device) ### 10 outputs = model(images) 11 outputs = outputs.to(device) ### 12 _, predicted = t.max(outputs.data, dim=1) # taking the max num from one_hot code 13 total += label.size(0) 14 # label‘s format in array that N*1 like [1,8,5,8,3,2,1,4,.....,5]. so label.size() return a array [N,1] and label.size(0) is N 15 correct += (predicted == label).sum().item() 16 print(‘Accurary on test set: %d %%‘ % (100 * correct / total))
全部代码(需要cuda而修改或者添加的地方用###在代码后标出)
1 import torch as t 2 from torchvision import transforms 3 from torchvision import datasets 4 from torch.utils.data import DataLoader 5 import torch.optim as op 6 import torch.nn.functional as f 7 8 use_gpu = t.cuda.is_available() ### 9 device = t.device(‘cuda:0‘) ### 10 11 batch_size = 64 12 transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) 13 # notice : Compose([]) there is []!!!!!!!!!!!!!!!!!!!!!!!! 14 # mnist mean : 0.1307, std :0.3081 15 # convert the PIL to Tensor and then normalized so that the gray level is [0,1] 16 17 train_dataset = datasets.MNIST(root=‘../dataset/mnist‘, train=True, download=True, transform=transform) 18 train_loader = DataLoader(train_dataset, batch_size=batch_size) 19 test_dataset = datasets.MNIST(root=‘../dataset/mnist‘, train=False, download=True, transform=transform) 20 test_loader = DataLoader(test_dataset, batch_size=batch_size) 21 22 test_dataset = test_dataset 23 train_loader = train_loader 24 test_dataset = test_dataset 25 test_loader = test_loader 26 27 28 class Net(t.nn.Module): 29 def __init__(self): 30 super(Net, self).__init__() 31 self.linear1 = t.nn.Linear(784, 512) 32 self.linear2 = t.nn.Linear(512, 256) 33 self.linear3 = t.nn.Linear(256, 128) 34 self.linear4 = t.nn.Linear(128, 64) 35 self.linear5 = t.nn.Linear(64, 10) 36 37 def forward(self, x): 38 x = x.view(-1, 784) # 1*28*28 Tensor to 1*784 vectors, -1 can auto_transform to pic nums 39 x = f.relu(self.linear1(x)) 40 x = f.relu(self.linear2(x)) 41 x = f.relu(self.linear3(x)) 42 x = f.relu(self.linear4(x)) 43 return self.linear5(x) 44 45 46 model = Net() # there is ()! 47 criterion = t.nn.CrossEntropyLoss() 48 optimizer = op.SGD(model.parameters(), lr=0.01, momentum=0.5) 49 50 # cuda 51 model = model.to(device) ### 52 criterion = criterion.to(device) ### 53 54 55 def train(epoch): 56 running_loss = 0.0 57 for batch_idx, data in enumerate(train_loader, 0): 58 inputs, target = data 59 # cuda inputs and target 60 inputs = inputs.to(device) ### 61 target = target.to(device) ### 62 optimizer.zero_grad() # noticed: the grad return to zero before starting the loop 63 64 # forward + backward +update 65 outputs = model(inputs) 66 outputs = outputs.to(device) ### 67 loss = criterion(outputs, target) 68 loss.backward() 69 optimizer.step() 70 71 running_loss += loss.item() 72 if batch_idx % 300 == 299: 73 print(‘[%d, %5d] loss: %.3f‘ % (epoch + 1, batch_idx + 1, running_loss / 300)) 74 running_loss = 0.0 75 76 77 def test(): 78 correct = 0 79 total = 0 80 with t.no_grad(): # ensuring grad can not updating 81 for data in test_loader: 82 images, label = data 83 # cuda images, label 84 images = images.to(device) ### 85 label = label.to(device) ### 86 outputs = model(images) 87 outputs = outputs.to(device) ### 88 _, predicted = t.max(outputs.data, dim=1) # taking the max num from one_hot code 89 total += label.size(0) 90 # label‘s format in array that N*1 like [1,8,5,8,3,2,1,4,.....,5]. so label.size() return a array [N,1] and label.size(0) is N 91 correct += (predicted == label).sum().item() 92 print(‘Accurary on test set: %d %%‘ % (100 * correct / total)) 93 94 95 if __name__ == ‘__main__‘: 96 for epoch in range(10): 97 train(epoch) 98 test()