讲解之前
dataloader程序是对图片进行不同的调整以扩充数据量,同样的把与之对应的目标框进行调整
这里引用的Bubbliiiing](https://blog.csdn.net/weixin_44791964?spm=1001.2014.3001.5509)关于YOLOV4的讲解一部分,并进行更改。YOLOV4
get_random_data
import cv2
import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset
from utils import merge_bboxes
path = 'xml.txt'
with open(path) as f:
lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
train_lines = lines[:num_train]
train_batches = len(train_lines)
image_size = [416,416]
#mosaic = mosaic
#flag = True
#is_train = is_train
annotation_line = lines[0]
input_shape = image_size
jitter=.3
random=False
#取一个进行分割,建立列表
line = annotation_line.split()
print("lines[0]为{},line为{}".format(lines[0],line))
#读取图像
image = Image.open(line[0])
#image.show()
iw, ih = image.size
h, w = input_shape
#iw:1920,ih:1080,w:416,h:416
print("iw:{},ih:{},w:{},h:{}".format(iw,ih,w,h))
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
#取筛选框及类别索引
print(box)
#在random为FALSE时进行
#一组服从标准正态分布的随机样本值,这里假设为n
def rand(a, b):
return np.random.rand() * (b - a) + a
if not random:
scale = min(w / iw, h / ih)#416/1920
nw = int(iw * scale)#416
nh = int(ih * scale)#234
dx = (w - nw) // 2#0
dy = (h - nh) // 2#91
#把1080*1920的缩小为234*416,放大情况的Image.BICUBIC插着原理
image = image.resize((nw, nh), Image.BICUBIC)
#生成一个RGB值为(128, 128, 128)大小为416*416的图像
new_image = Image.new('RGB', (w, h), (128, 128, 128))
#将缩小后的图像粘贴到416*416的中间
new_image.paste(image, (dx, dy))
#new_image.show()
#转换数据类型
image_data = np.array(new_image, np.float32)
# 生成一个目标框数目*5的全0列表
box_data = np.zeros((len(box), 5))
print("box_data",box_data)
if len(box) > 0:
#以行维度随机洗牌
np.random.shuffle(box)
print(box)
#将目标框在1920*1080的图像信息,按比例转换的416*416图像中
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
#box[:, 0:2]取box每行的0和1列的数据,[box[:, 0:2] < 0]判断其数据<0返回TURE,否则返回FLASE
box[:, 0:2][box[:, 0:2] < 0] = 0
print("box[:, 0:2]={},[box[:, 0:2] < 0]={}".format(box[:, 0:2],[box[:, 0:2] < 0]))
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
print("box[:, 2]={},[box[:, 2] >w]={}".format(box[:, 2], [box[:, 2] > w]))
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
#在416*416图像中宽和高都少于1的去掉
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
#赋值
box_data[:len(box)] = box
print(box_data)
#一组服从标准正态分布的随机样本值,这里假设为n
#416/416 * (0.6n + 0.7) / (0.6n + 0.7) 前后的n是不同的
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
print(new_ar)
#1.75n + 0.25
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
#image.show()
#加入随机噪声改变图像的缩放大小
print(image.size)
# 放置图片 (w - nw)*n
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
new_image.paste(image, (dx, dy))
image = new_image
#image.show()
#随机翻转图片
flip = rand(0,1) < .5
if flip:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
#色域变换
hue=.1
sat=1.5
val=1.5
#-0.1 + 0.2n
hue = rand(-hue, hue)
#1 + 0.5n或其倒数
sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)
#将RGB格式的图片文件转换成HSV形式
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
#色相H空间进行加减赋值变换
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
#饱和度和亮度变换
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
#转换为现在的图像
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
#调整目标框坐标
box_data = np.zeros((len(box), 5))
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
if flip:
box[:, [0, 2]] = w - box[:, [2, 0]]
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
同样的get_random_data_with_Mosaic是跟上述过程类似
hue=.1
sat=1.5
val=1.5
def rand(a, b):
return np.random.rand() * (b - a) + a
random=False
h, w = input_shape
#iw:1920,ih:1080,w:416,h:416
min_offset_x = 0.3
min_offset_y = 0.3
scale_low = 1 - min(min_offset_x, min_offset_y)#0.7
scale_high = scale_low + 0.2#0.9
image_datas = []
box_datas = []
index = 0
#[0,0,124,124]
place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]
print(place_x)
place_y = [0, int(h * min_offset_y), int(h * min_offset_y), 0]
for line in annotation_line:
#每一行进行分割
line_content = line.split()
#打开图片
image = Image.open(line_content[0])
#对图片进行转换为RGB形式
image = image.convert("RGB")
#图片的大小
iw, ih = image.size
#保存框的位置,取该副图像下的所有目标框
box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])
#是否翻转图片n
flip = rand(0,1) < .5
if flip and len(box) > 0:
#反转操作
image = image.transpose(Image.FLIP_LEFT_RIGHT)
#对应框的变化
box[:, [0, 2]] = iw - box[:, [2, 0]]
#对输入进来的图片进行缩放
new_ar = w / h#1
scale = rand(scale_low, scale_high)#0.2n+0.7
#以最大的边为标准进行缩放,这里为正方形,不需要做过多考虑
if new_ar < 1:
nh = int(scale * h)#(0.2n+0.7)*416
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# 进行色域变换
hue = rand(-hue, hue)#0.2n - 0.1
sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)#0.5n + 1或其倒数
val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)#0.5n + 1或其倒数
#RGB-->HSV
x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue*360#H变换
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:,:, 0]>360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0
image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) #转换为RGB
image = Image.fromarray((image * 255).astype(np.uint8))
# 将图片进行放置,分别对应四张分割图片的位置,详情查看上面的讲解
dx = place_x[index]
dy = place_y[index]
new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)
index = index + 1
box_data = []
# 对box进行重新处理
if len(box) > 0:
np.random.shuffle(box)
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
image_datas.append(image_data)
box_datas.append(box_data)
# 将图片分割,放在一起。生成随机数 124 291
cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))
cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))
new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
# 对框进行进一步的处理,见utils的merge_bboxes
new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))