1、ResNet
2、ResNext
2020-12-18
3、ResNest
4、数据增强
其他的一些具体方法:
数据中心化
数据标准化
缩放
裁剪
旋转
翻转
填充
噪声添加
灰度变换
线性变换
仿射变换
亮度、饱和度及对比度变换
1. 裁剪
- 中心裁剪:transforms.CenterCrop(512); # 一个参数,宽高相等
- 随机裁剪:transforms.RandomCrop;
- 随机大小、长宽比裁剪:transforms.RandomResizedCrop;
- 上下左右中心裁剪:transforms.FiveCrop;
- 上下左右中心裁剪后翻转: transforms.TenCrop。
2. 翻转和旋转
- 依概率p水平翻转:transforms.RandomHorizontalFlip(p=0.5);
- 依概率p垂直翻转:transforms.RandomVerticalFlip(p=0.5);
- 随机旋转:transforms.RandomRotation(degrees, resample=False, expand=False, center=None)。
- degrees:旋转角度,当为一个数a时,在(-a,a)之间随机旋转
- resample:重采样方法
- expand:旋转时是否保持图片完整,只针对中心旋转
- center:设置旋转中心点
3. 随机遮挡
- 对图像进行随机遮挡: transforms.RandomErasing。
4. 图像变换
- 尺寸变换:transforms.Resize;
- 标准化:transforms.Normalize;
- 填充:transforms.Pad;
- 修改亮度、对比度和饱和度:transforms.ColorJitter;
- 转灰度图:transforms.Grayscale;
- 依概率p转为灰度图:transforms.RandomGrayscale;
- 线性变换:transforms.LinearTransformation();
- 仿射变换:transforms.RandomAffine;
- 将数据转换为PILImage:transforms.ToPILImage;
- 转为tensor,并归一化至[0-1]:transforms.ToTensor;
- 用户自定义方法:transforms.Lambda。
5. 对transforms的选择操作,使数据增强更灵活
- transforms.RandomChoice(transforms列表):从给定的一系列transforms中选一个进行操作;
- transforms.RandomApply(transforms列表, p=0.5):给一个transform加上概率,依概率进行选择操作;
- transforms.RandomOrder(transforms列表):将transforms中的操作随机打乱。
还有几个自定义的数据增强:
import numpy as np from PIL import ImageDraw, Image import matplotlib.pyplot as plt def randomErasing(img, p=0.5, sl=0.02, sh=0.4, r1=0.3, r2=3): """ 随机擦除 """ if np.random.rand() > p: return img img = np.array(img) while True: img_h, img_w, img_c = img.shape img_area = img_h * img_w mask_area = np.random.uniform(sl, sh) * img_area mask_aspect_ratio = np.random.uniform(r1, r2) mask_w = int(np.sqrt(mask_area / mask_aspect_ratio)) mask_h = int(np.sqrt(mask_area * mask_aspect_ratio)) # 产生的mask区域 mask = np.random.rand(mask_h, mask_w, img_c) * 255 left = np.random.randint(0, img_w) top = np.random.randint(0, img_h) right = left + mask_w bottom = top + mask_h if right <= img_w and bottom <= img_h: break img[top:bottom, left:right, :] = mask return Image.fromarray(img) def randomOcclusion(img, p=0.5, sl=0.02, sh=0.4, r1=0.3, r2=3): """ 随机遮挡 """ if np.random.rand() > p: return img img = np.array(img) while True: h, w = img.shape[:2] image_area = h * w mask_area = np.random.uniform(sl, sh) * image_area aspect_ratio = np.random.uniform(r1, r2) mask_h = int(np.sqrt(mask_area * aspect_ratio)) mask_w = int(np.sqrt(mask_area / aspect_ratio)) x0 = np.random.randint(0, w - mask_w) y0 = np.random.randint(0, h - mask_h) x1 = x0 + mask_w y1 = y0 + mask_h img[y0:y1, x0:x1, :] = (0,244,255) # http://www.yuangongju.com/color if mask_w < w and mask_h < h: break return Image.fromarray(img) def randomMirror(img, boxes, p=0.5): """ 水平翻转 """ if np.random.rand() > p: return img img = np.array(img) height, width, _ = img.shape # 图像水平翻转 img = img[:, ::-1, :] labels = [0 for _ in range(8)] # 改变标注框 labels[0] = width-1 - boxes[0] labels[1] = boxes[1] labels[2] = width-1 - boxes[2] labels[3] = boxes[3] labels[4] = width-1 - boxes[4] labels[5] = boxes[5] labels[6] = width-1 - boxes[6] labels[7] = boxes[7] print(labels) img = Image.fromarray(img) return img, labels img = Image.open(r'E:\wangpeng\code\models\pytorch\test1.jpg') list = [300,100,100,100,100,50,300,50] # Img = randomErasing(img, p=0.5, sl=0.02, sh=0.4, r1=0.3, r2=3) # Img = randomOcclusion(img, p=0.8, sl=0.02, sh=0.4, r1=0.3, r2=3) Img, labels = randomMirror(img, boxes=list, p=0.8) draw = ImageDraw.Draw(Img) draw.polygon(labels, outline = 'gold') Img.show()
做的一个小任务,对6类场景进行分类:具体内容也会发到github 上
对比结果如下:
对比了只用resnet50,只用resnext50_32x2d,resnext50_32x2d加数据增强,这是一个比赛:https://www.kaggle.com/puneet6060/intel-image-classification。感觉
我用的cutout,里面还有mixup,代码如下:
1 from __future__ import print_function, division 2 3 import torch 4 import torch.nn as nn 5 import torch.optim as optim 6 from torch.optim import lr_scheduler 7 from torch.autograd import Variable 8 from torchvision import models, transforms 9 import numpy as np 10 import time 11 import os 12 from torch.utils.data import Dataset 13 14 from PIL import Image 15 16 from models import resnext 17 18 # use PIL Image to read image 19 def default_loader(path): 20 try: 21 img = Image.open(path) 22 return img.convert('RGB') 23 except: 24 print("Cannot read image: {}".format(path)) 25 26 # define your Dataset. Assume each line in your .txt file is [name/tab/label], for example:0001.jpg 1 27 class Data(Dataset): 28 def __init__(self, img_path, txt_path, dataset = '', data_transforms=None, loader = default_loader): 29 with open(txt_path) as input_file: 30 lines = input_file.readlines() 31 self.img_name =A= [os.path.join(img_path, line.strip().split(' ')[0]) for line in lines] 32 self.img_label =B= [int(line.strip('\n').split(' ')[-1]) for line in lines] 33 34 35 self.data_transforms = data_transforms 36 self.dataset = dataset 37 self.loader = loader 38 39 def __len__(self): 40 return len(self.img_name) 41 42 def __getitem__(self, item): 43 img_name = self.img_name[item] 44 label = self.img_label[item] 45 img = self.loader(img_name) 46 47 if self.data_transforms is not None: 48 try: 49 img = self.data_transforms[self.dataset](img) 50 except: 51 print("Cannot transform image: {}".format(img_name)) 52 return img, label 53 54 def train_model(model, criterion, optimizer, scheduler, num_epochs, use_gpu): 55 since = time.time() 56 57 best_model_wts = model.state_dict() 58 best_acc = 0.0 59 count_batch = 0 60 61 for epoch in range(num_epochs): 62 begin_time = time.time() 63 print('Epoch {}/{}'.format(epoch, num_epochs - 1)) 64 print('-' * 10) 65 66 # Each epoch has a training and validation phase 67 for phase in ['train', 'test']: 68 69 if phase == 'train': 70 scheduler.step() 71 model.train(True) # Set model to training mode 72 else: 73 model.train(False) # Set model to evaluate mode 74 75 running_loss = 0.0 76 running_corrects = 0.0 77 78 # Iterate over data. 79 for data in dataloders[phase]: 80 count_batch += 1 81 # get the inputs 82 inputs, labels = data 83 84 # wrap them in Variable 85 if use_gpu: 86 inputs = Variable(inputs.cuda()) 87 labels = Variable(labels.cuda()) 88 else: 89 inputs, labels = Variable(inputs), Variable(labels) 90 91 # zero the parameter gradients 92 optimizer.zero_grad() 93 94 # forward 95 outputs = model(inputs) 96 _, preds = torch.max(outputs.data, 1) 97 loss = criterion(outputs, labels) 98 99 # backward + optimize only if in training phase 100 if phase == 'train': 101 loss.backward() 102 optimizer.step() 103 # statistics 104 # running_loss += loss.data[0] 105 running_loss += loss.item() 106 running_corrects += torch.sum(preds == labels.data).to(torch.float32) 107 108 # print result every 10 batch 109 if count_batch%10 == 0: 110 batch_loss = running_loss / (batch_size*count_batch) 111 batch_acc = running_corrects / (batch_size*count_batch) 112 print('{} Epoch [{}] Batch [{}] Loss: {:.4f} Acc: {:.4f} Time: {:.4f}s'. \ 113 format(phase, epoch, count_batch, batch_loss, batch_acc, time.time()-begin_time)) 114 begin_time = time.time() 115 116 epoch_loss = running_loss / dataset_sizes[phase] 117 epoch_acc = running_corrects / dataset_sizes[phase] 118 print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) 119 120 # save model 121 if phase == 'train': 122 if not os.path.exists('output'): 123 os.makedirs('output') 124 torch.save(model, 'output/resnet_epoch{}.pkl'.format(epoch)) 125 126 # deep copy the model 127 if phase == 'test' and epoch_acc > best_acc: 128 best_acc = epoch_acc 129 best_model_wts = model.state_dict() 130 131 time_elapsed = time.time() - since 132 print('Training complete in {:.0f}m {:.0f}s'.format( 133 time_elapsed // 60, time_elapsed % 60)) 134 print('Best test Acc: {:4f}'.format(best_acc)) 135 136 # load best model weights 137 model.load_state_dict(best_model_wts) 138 return model 139 140 141 ########################################################################################## 142 ## Cutout means 143 class Cutout(object): 144 """Randomly mask out one or more patches from an image. 145 Args: 146 n_holes (int): Number of patches to cut out of each image. 147 length (int): The length (in pixels) of each square patch. 148 """ 149 def __init__(self, n_holes, length): 150 self.n_holes = n_holes 151 self.length = length 152 153 def __call__(self, img): 154 """ 155 Args: 156 img (Tensor): Tensor image of size (C, H, W). 157 Returns: 158 Tensor: Image with n_holes of dimension length x length cut out of it. 159 """ 160 h = img.size(1) 161 w = img.size(2) 162 163 mask = np.ones((h, w), np.float32) 164 165 for n in range(self.n_holes): 166 # (x,y)is the sit of centor 167 y = np.random.randint(h) 168 x = np.random.randint(w) 169 170 y1 = np.clip(y - self.length // 2, 0, h) 171 y2 = np.clip(y + self.length // 2, 0, h) 172 x1 = np.clip(x - self.length // 2, 0, w) 173 x2 = np.clip(x + self.length // 2, 0, w) 174 175 mask[y1: y2, x1: x2] = 0. 176 177 mask = torch.from_numpy(mask) 178 mask = mask.expand_as(img) 179 img = img * mask 180 181 return img 182 ####################################################################################### 183 def mixup_data(x, y, alpha=1.0, use_cuda=True): 184 '''Returns mixed inputs, pairs of targets, and lambda''' 185 if alpha > 0: 186 lam = np.random.beta(alpha, alpha) 187 else: 188 lam = 1 189 190 batch_size = x.size()[0] 191 if use_cuda: 192 index = torch.randperm(batch_size).cuda() 193 else: 194 index = torch.randperm(batch_size) 195 196 mixed_x = lam * x + (1 - lam) * x[index, :] 197 y_a, y_b = y, y[index] 198 199 return mixed_x, y_a, y_b, lam 200 def mixup_criterion(criterion, pred, y_a, y_b, lam): 201 return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) 202 ########################################################################################## 203 204 if __name__ == '__main__': 205 206 mean = [0.4309895, 0.4576381, 0.4534026] 207 std = [0.2699252, 0.26827288, 0.29846913] 208 209 data_transforms = { 210 'train': transforms.Compose([ 211 transforms.Resize((150, 150)), 212 transforms.RandomHorizontalFlip(), 213 transforms.ToTensor(), 214 transforms.Normalize(mean, std), 215 Cutout(n_holes=1, length=16) 216 ]), 217 'test': transforms.Compose([ 218 transforms.Resize((150, 150)), 219 transforms.ToTensor(), 220 transforms.Normalize(mean, std), 221 ]), 222 } 223 224 use_gpu = torch.cuda.is_available() 225 226 batch_size = 16 227 num_class = 6 228 229 image_datasets = {x: Data(img_path='SceneClassification', 230 txt_path=('SceneClassification/' + x + '.txt'), 231 data_transforms=data_transforms, 232 dataset=x) for x in ['train', 'test']} 233 234 # wrap your data and label into Tensor 235 dataloders = {x: torch.utils.data.DataLoader(image_datasets[x], 236 batch_size=batch_size, 237 shuffle=True) for x in ['train', 'test']} 238 239 dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']} 240 241 # get model and replace the original fc layer with your fc layer 242 243 ### resnet50 resnext50_32x4d 244 245 model_ft = models.resnext50_32x4d(pretrained=True) 246 247 num_ftrs = model_ft.fc.in_features 248 model_ft.fc = nn.Linear(num_ftrs, num_class) 249 250 # if use gpu 251 if use_gpu: 252 model_ft = model_ft.cuda() 253 254 # define cost function 255 criterion = nn.CrossEntropyLoss() 256 257 # Observe that all parameters are being optimized 258 optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.005, momentum=0.9) 259 260 # Decay LR by a factor of 0.2 every 5 epochs 261 exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.2) 262 263 # multi-GPU 264 model_ft = torch.nn.DataParallel(model_ft, device_ids=[0]) 265 266 # train model 267 model_ft = train_model(model=model_ft, 268 criterion=criterion, 269 optimizer=optimizer_ft, 270 scheduler=exp_lr_scheduler, 271 num_epochs=25, 272 use_gpu=use_gpu) 273 274 # save best model 275 torch.save(model_ft, "output/best_resnet.pkl")View Code
其中还写了一个计算自己数据集的men,std的代码一并放出:
1 import numpy as np 2 import cv2 3 import os 4 5 6 img_h, img_w = 150, 150 7 means, stdevs = [], [] 8 img_list = [] 9 10 imgs_path = 'SceneClassification/train/' 11 imgs_path_list = os.listdir(imgs_path) 12 13 len_ = len(imgs_path_list) 14 i = 0 15 for item in imgs_path_list: 16 img = cv2.imread(os.path.join(imgs_path, item)) 17 img = cv2.resize(img, (img_w, img_h)) 18 img = img[:, :, :, np.newaxis] 19 img_list.append(img) 20 i += 1 21 print(i, '/', len_) 22 23 imgs = np.concatenate(img_list, axis=3) 24 imgs = imgs.astype(np.float32) / 255. 25 26 for i in range(3): 27 pixels = imgs[:, :, i, :].ravel() # 拉成一行 28 means.append(np.mean(pixels)) 29 stdevs.append(np.std(pixels)) 30 31 # BGR --> RGB , CV读取的需要转换,PIL读取的不用转换 32 means.reverse() 33 stdevs.reverse() 34 35 print("normMean = {}".format(means)) 36 print("normStd = {}".format(stdevs))View Code