关于YOLOV4 dataloader的详细理解

讲解之前
dataloader程序是对图片进行不同的调整以扩充数据量,同样的把与之对应的目标框进行调整

这里引用的Bubbliiiing](https://blog.csdn.net/weixin_44791964?spm=1001.2014.3001.5509)关于YOLOV4的讲解一部分,并进行更改。YOLOV4

get_random_data


import cv2
import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset

from utils import merge_bboxes


path = 'xml.txt'
with open(path) as f:
    lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
train_lines = lines[:num_train]
train_batches = len(train_lines)
image_size = [416,416]
#mosaic = mosaic
#flag = True
#is_train = is_train
annotation_line = lines[0]
input_shape = image_size
jitter=.3
random=False
#取一个进行分割,建立列表
line = annotation_line.split()
print("lines[0]为{},line为{}".format(lines[0],line))
#读取图像
image = Image.open(line[0])
#image.show()
iw, ih = image.size
h, w = input_shape
#iw:1920,ih:1080,w:416,h:416
print("iw:{},ih:{},w:{},h:{}".format(iw,ih,w,h))
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
#取筛选框及类别索引
print(box)
#在random为FALSE时进行
#一组服从标准正态分布的随机样本值,这里假设为n
def rand(a, b):
    return np.random.rand() * (b - a) + a

if not random:
    scale = min(w / iw, h / ih)#416/1920
    nw = int(iw * scale)#416
    nh = int(ih * scale)#234
    dx = (w - nw) // 2#0
    dy = (h - nh) // 2#91
    #把1080*1920的缩小为234*416,放大情况的Image.BICUBIC插着原理
    image = image.resize((nw, nh), Image.BICUBIC)
    #生成一个RGB值为(128, 128, 128)大小为416*416的图像
    new_image = Image.new('RGB', (w, h), (128, 128, 128))
    #将缩小后的图像粘贴到416*416的中间
    new_image.paste(image, (dx, dy))
    #new_image.show()
    #转换数据类型
    image_data = np.array(new_image, np.float32)
    # 生成一个目标框数目*5的全0列表
    box_data = np.zeros((len(box), 5))
    print("box_data",box_data)
    if len(box) > 0:
        #以行维度随机洗牌
        np.random.shuffle(box)
        print(box)
        #将目标框在1920*1080的图像信息,按比例转换的416*416图像中
        box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
        box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
        #box[:, 0:2]取box每行的0和1列的数据,[box[:, 0:2] < 0]判断其数据<0返回TURE,否则返回FLASE
        box[:, 0:2][box[:, 0:2] < 0] = 0
        print("box[:, 0:2]={},[box[:, 0:2] < 0]={}".format(box[:, 0:2],[box[:, 0:2] < 0]))
        box[:, 2][box[:, 2] > w] = w
        box[:, 3][box[:, 3] > h] = h
        print("box[:, 2]={},[box[:, 2] >w]={}".format(box[:, 2], [box[:, 2] > w]))
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        #在416*416图像中宽和高都少于1的去掉
        box = box[np.logical_and(box_w > 1, box_h > 1)]
        box_data = np.zeros((len(box), 5))
        #赋值
        box_data[:len(box)] = box
        print(box_data)

#一组服从标准正态分布的随机样本值,这里假设为n
#416/416 * (0.6n + 0.7) / (0.6n + 0.7) 前后的n是不同的
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
print(new_ar)
#1.75n + 0.25
scale = rand(.25, 2)
if new_ar < 1:
    nh = int(scale * h)
    nw = int(nh * new_ar)
else:
    nw = int(scale * w)
    nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
#image.show()
#加入随机噪声改变图像的缩放大小
print(image.size)

# 放置图片  (w - nw)*n
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
new_image.paste(image, (dx, dy))
image = new_image
#image.show()
#随机翻转图片
flip = rand(0,1) < .5
if flip:
    image = image.transpose(Image.FLIP_LEFT_RIGHT)
#色域变换
hue=.1
sat=1.5
val=1.5
#-0.1 + 0.2n
hue = rand(-hue, hue)
#1 + 0.5n或其倒数
sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)
#将RGB格式的图片文件转换成HSV形式
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
#色相H空间进行加减赋值变换
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
#饱和度和亮度变换
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
#转换为现在的图像
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
#调整目标框坐标
box_data = np.zeros((len(box), 5))
if len(box) > 0:
    np.random.shuffle(box)
    box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
    box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
    if flip:
        box[:, [0, 2]] = w - box[:, [2, 0]]
    box[:, 0:2][box[:, 0:2] < 0] = 0
    box[:, 2][box[:, 2] > w] = w
    box[:, 3][box[:, 3] > h] = h
    box_w = box[:, 2] - box[:, 0]
    box_h = box[:, 3] - box[:, 1]
    box = box[np.logical_and(box_w > 1, box_h > 1)]
    box_data = np.zeros((len(box), 5))
    box_data[:len(box)] = box

同样的get_random_data_with_Mosaic是跟上述过程类似

hue=.1
sat=1.5
val=1.5
def rand(a, b):
    return np.random.rand() * (b - a) + a
random=False
h, w = input_shape
#iw:1920,ih:1080,w:416,h:416
min_offset_x = 0.3
min_offset_y = 0.3
scale_low = 1 - min(min_offset_x, min_offset_y)#0.7
scale_high = scale_low + 0.2#0.9

image_datas = []
box_datas = []
index = 0
#[0,0,124,124]
place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]
print(place_x)
place_y = [0, int(h * min_offset_y), int(h * min_offset_y), 0]
for line in annotation_line:
    #每一行进行分割
    line_content = line.split()
    #打开图片
    image = Image.open(line_content[0])
    #对图片进行转换为RGB形式
    image = image.convert("RGB")
    #图片的大小
    iw, ih = image.size
    #保存框的位置,取该副图像下的所有目标框
    box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])
    #是否翻转图片n
    flip = rand(0,1) < .5
    if flip and len(box) > 0:
        #反转操作
        image = image.transpose(Image.FLIP_LEFT_RIGHT)
        #对应框的变化
        box[:, [0, 2]] = iw - box[:, [2, 0]]

    #对输入进来的图片进行缩放
    new_ar = w / h#1
    scale = rand(scale_low, scale_high)#0.2n+0.7
    #以最大的边为标准进行缩放,这里为正方形,不需要做过多考虑
    if new_ar < 1:
        nh = int(scale * h)#(0.2n+0.7)*416
        nw = int(nh * new_ar)
    else:
        nw = int(scale * w)
        nh = int(nw / new_ar)
    image = image.resize((nw, nh), Image.BICUBIC)

    # 进行色域变换
    hue = rand(-hue, hue)#0.2n - 0.1
    sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)#0.5n + 1或其倒数
    val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)#0.5n + 1或其倒数
    #RGB-->HSV
    x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
    x[..., 0] += hue*360#H变换
    x[..., 0][x[..., 0]>1] -= 1
    x[..., 0][x[..., 0]<0] += 1
    x[..., 1] *= sat
    x[..., 2] *= val
    x[x[:,:, 0]>360, 0] = 360
    x[:, :, 1:][x[:, :, 1:]>1] = 1
    x[x<0] = 0
    image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) #转换为RGB

    image = Image.fromarray((image * 255).astype(np.uint8))
    # 将图片进行放置,分别对应四张分割图片的位置,详情查看上面的讲解
    dx = place_x[index]
    dy = place_y[index]
    new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
    new_image.paste(image, (dx, dy))
    image_data = np.array(new_image)

    index = index + 1
    box_data = []
    # 对box进行重新处理
    if len(box) > 0:
        np.random.shuffle(box)
        box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
        box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
        box[:, 0:2][box[:, 0:2] < 0] = 0
        box[:, 2][box[:, 2] > w] = w
        box[:, 3][box[:, 3] > h] = h
        box_w = box[:, 2] - box[:, 0]
        box_h = box[:, 3] - box[:, 1]
        box = box[np.logical_and(box_w > 1, box_h > 1)]
        box_data = np.zeros((len(box), 5))
        box_data[:len(box)] = box

    image_datas.append(image_data)
    box_datas.append(box_data)

# 将图片分割,放在一起。生成随机数 124 291
cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))
cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))

new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

# 对框进行进一步的处理,见utils的merge_bboxes
new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))
上一篇:对拍


下一篇:重拾C语言 对简单猜字游戏利用随机数生成器编写