深度学习对数据集的预处理
因为在使用神经网络的时候常常采用的图片数据集,常常是一个尺寸相同的,但是我们下载来的数据集往往尺寸不一定相同。所以我们应该转化为相同尺寸的数据集。笔者首先考虑过用cv2.resize()把图片变为等尺寸的,在同torch.form_numpy()转化成tensor来出来,但是resize改变了图片等的比例,所以在神经网络中的拟合出的结果可能不是我们所希望的。
所以我们采用一下的方法:
- 首先设置一个图片的目标尺寸
- 把图片以最短边按比例缩小
- 然后随机剪裁为目标尺寸
代码环境:python3.7.4,pytorch1.4.0,jupyter notebook
#!/usr/bin/env python# coding: utf-8# In[1]:from __future__ import print_function,divisionimport osimport torchimport pandas as pdfrom skimage import io,transformimport numpy as npimport matplotlib.pyplot as pltfrom torch.utils.data import Dataset,DataLoaderfrom torchvision import transforms,utilsimport warnings# In[2]:warnings.filterwarnings('ignore')# In[3]:plt.ion()# In[4]:landmarks_frame=pd.read_csv('data/faces/face_landmarks.csv')# https://download.pytorch.org/tutorial/faces.zip 数据集下载地址# 把数据放在data文件夹下n=65img_name=landmarks_frame.iloc[n,0]landmarks=landmarks_frame.iloc[n,1:]landmarks=np.asarray(landmarks)landmarks=landmarks.astype('float').reshape(-1,2)# 转化成n行2列的形式print('Image name: {}'.format(img_name))print('Landmarks shape: {}'.format(landmarks.shape))print('First 4 Landmarks: {}'.format(landmarks[:4]))# In[5]:def show_landmarks(image, landmarks):plt.imshow(image)plt.scatter(landmarks[:,0],landmarks[:,1],s=10,marker='.',c='red')plt.pause(0.001)# In[6]:plt.figure()show_landmarks(io.imread(os.path.join('data/faces/', img_name)), landmarks)plt.show()# In[7]:class FaceLandmarksDataset(Dataset):def __init__(self,csv_file,root_dir,transform=None):''' :param csv_file: 带注释带csv文件路径 :param root_dir: 所有图像的目录 :param transform: (可选)在一个样本上转换 '''self.landmarks_frame=pd.read_csv(csv_file)self.root_dir=root_dir self.transform=transform def __len__(self):return len(self.landmarks_frame)def __getitem__(self, idx):if torch.is_tensor(idx):idx=idx.tolist() # 将张量作为(嵌套的)列表返回img_name=os.path.join(self.root_dir, self.landmarks_frame.iloc[idx,0]) # 图片地址image=io.imread(img_name)landmarks=self.landmarks_frame.iloc[idx,1:] # 图片的标记点landmarks=np.array([landmarks])landmarks=landmarks.astype('float').reshape(-1,2)sample={'image':image,'landmarks':landmarks}if self.transform:sample=self.transform(sample) # 转置return sample # In[8]:face_dataset=FaceLandmarksDataset(csv_file='data/faces/face_landmarks.csv', root_dir='data/faces/')fig=plt.figure()for i in range(len(face_dataset)):sample=face_dataset[i]print(i,sample['image'].shape,sample['landmarks'].shape)ax=plt.subplot(1,4,i+1)plt.tight_layout()ax.set_title('sample #{}'.format(i))ax.axis('off')show_landmarks(**sample) # dict输入if i==3: # 展示前4组图片plt.show()break# In[9]:class Rescale(object):""" 把图片缩放为相同的大小 如果为元组,则输出与output_size匹配。 如果为int,则将较小的图像边缘与output_size匹配,并保持宽高比相同。 参数: output_size:输出大小 """def __init__(self, output_size):assert isinstance(output_size,(int,tuple))self.output_size=output_size def __call__(self, sample):image, landmarks = sample['image'], sample['landmarks']h, w = image.shape[:2]if isinstance(self.output_size, int): # 如果是整型,将较小的图像边缘与output_size匹配,并保持宽高比相同if h > w:new_h,new_w=self.output_size*h/w,self.output_size else:new_h,new_w=self.output_size,self.output_size*w/helse:new_h, new_w = self.output_size new_h, new_w = int(new_h),int(new_w)img = transform.resize(image, (new_h, new_w))# h and w are swapped for landmarks because for images,# x and y axes are axis 1 and 0 respectivelylandmarks = landmarks*[new_w / w, new_h / h] # 同时把标记按比例缩小return {'image': img, 'landmarks': landmarks}# In[10]:class RandomCrop(object):""" 随机裁剪图片 Args: output_size (tuple or int):期望的输入如果是整形则裁剪成正方形 """def __init__(self, output_size):assert isinstance(output_size, (int, tuple))if isinstance(output_size, int):self.output_size = (output_size, output_size)else:assert len(output_size) == 2self.output_size = output_size def __call__(self, sample):image, landmarks = sample['image'], sample['landmarks']h, w = image.shape[:2]new_h, new_w = self.output_size top = np.random.randint(0, h - new_h) # 在0到h-new_h之间产生随机数left = np.random.randint(0, w - new_w)image = image[top: top + new_h, left: left + new_w] # 随机剪裁的范围landmarks = landmarks - [left, top] return {'image': image, 'landmarks': landmarks}# In[11]:class ToTensor(object):""" 把darray转成tensor """def __call__(self, sample):image, landmarks = sample['image'], sample['landmarks']# numpy image: H x W x C# torch image: C X H X Wimage = image.transpose((2, 0, 1)) # 把numpy的格式转化成tensorreturn {'image': torch.from_numpy(image),'landmarks': torch.from_numpy(landmarks)}# In[12]:scale = Rescale(256)crop = RandomCrop(128)composed = transforms.Compose([Rescale(256), RandomCrop(224)])# 在每一个样本图片上应用fig = plt.figure()sample = face_dataset[65]for i, tsfrm in enumerate([scale, crop, composed]):transformed_sample = tsfrm(sample)ax = plt.subplot(1, 3, i + 1)plt.tight_layout()ax.set_title(type(tsfrm).__name__)show_landmarks(**transformed_sample)plt.show()# In[13]:transformed_dataset=FaceLandmarksDataset(csv_file='data/faces/face_landmarks.csv', root_dir='data/faces/', transform=transforms.Compose([Rescale(256), RandomCrop(224), ToTensor()]))for i in range(len(transformed_dataset)):sample=transformed_dataset[i]print(i,sample['image'].size(),sample['landmarks'].size())if i==3:break# In[14]:dataloader = DataLoader(transformed_dataset,batch_size=4,shuffle=True, num_workers=4) # 用4个进程来加载数据每个批次4个并洗牌# In[15]:def show_landmarks_batch(sample_batched):# 在一组图片中使用标记展示图片images_batch,landmarks_batch=sample_batched['image'],sample_batched['landmarks']batch_size=len(images_batch)im_size=images_batch.size(2)grid_border_size=2grid=utils.make_grid(images_batch)plt.imshow(grid.numpy().transpose((1,2,0)))for i in range(batch_size):plt.scatter(landmarks_batch[i,:,0].numpy()+i*im_size+(i+1)*grid_border_size,landmarks_batch[i,:,1].numpy()+grid_border_size,s=10,marker='.',c='red')plt.title('Batch from dataloader')# In[16]:for i_batch, sample_batched in enumerate(dataloader):print(i_batch,sample_batched['image'].size(),sample_batched['landmarks'].size())if i_batch==3:plt.figure()show_landmarks_batch(sample_batched)plt.axis('off')plt.ioff()plt.show()break