import cv2
import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset
from utils import merge_bboxes
path = 'xml.txt'
with open(path) as f:
lines = f.readlines()
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
train_lines = lines[:num_train]
train_batches = len(train_lines)
image_size = [416,416]
#mosaic = mosaic
#flag = True
#is_train = is_train
annotation_line = lines[0]
input_shape = image_size
line = annotation_line.split()
image = Image.open(line[0])
iw, ih = image.size
h, w = input_shape
box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
def rand(a, b):
return np.random.rand() * (b - a) + a
if not random:
scale = min(w / iw, h / ih)#416/1920
nw = int(iw * scale)#416
nh = int(ih * scale)#234
dx = (w - nw) // 2#0
dy = (h - nh) // 2#91
image = image.resize((nw, nh), Image.BICUBIC)
#生成一个RGB值为(128, 128, 128)大小为416*416的图像
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image, np.float32)
# 生成一个目标框数目*5的全0列表
box_data = np.zeros((len(box), 5))
if len(box) > 0:
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
#box[:, 0:2]取box每行的0和1列的数据,[box[:, 0:2] < 0]判断其数据<0返回TURE,否则返回FLASE
box[:, 0:2][box[:, 0:2] < 0] = 0
print("box[:, 0:2]={},[box[:, 0:2] < 0]={}".format(box[:, 0:2],[box[:, 0:2] < 0]))
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
print("box[:, 2]={},[box[:, 2] >w]={}".format(box[:, 2], [box[:, 2] > w]))
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
#416/416 * (0.6n + 0.7) / (0.6n + 0.7) 前后的n是不同的
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
#1.75n + 0.25
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# 放置图片 (w - nw)*n
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
new_image.paste(image, (dx, dy))
image = new_image
flip = rand(0,1) < .5
if flip:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
#-0.1 + 0.2n
hue = rand(-hue, hue)
#1 + 0.5n或其倒数
sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)
x = cv2.cvtColor(np.array(image, np.float32) / 255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue * 360
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:, :, 0] > 360, 0] = 360
x[:, :, 1:][x[:, :, 1:] > 1] = 1
x[x < 0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) * 255
box_data = np.zeros((len(box), 5))
if len(box) > 0:
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
if flip:
box[:, [0, 2]] = w - box[:, [2, 0]]
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
def rand(a, b):
return np.random.rand() * (b - a) + a
h, w = input_shape
min_offset_x = 0.3
min_offset_y = 0.3
scale_low = 1 - min(min_offset_x, min_offset_y)#0.7
scale_high = scale_low + 0.2#0.9
image_datas = []
box_datas = []
index = 0
place_x = [0, 0, int(w * min_offset_x), int(w * min_offset_x)]
place_y = [0, int(h * min_offset_y), int(h * min_offset_y), 0]
for line in annotation_line:
line_content = line.split()
image = Image.open(line_content[0])
image = image.convert("RGB")
iw, ih = image.size
box = np.array([np.array(list(map(int, box.split(',')))) for box in line_content[1:]])
flip = rand(0,1) < .5
if flip and len(box) > 0:
image = image.transpose(Image.FLIP_LEFT_RIGHT)
box[:, [0, 2]] = iw - box[:, [2, 0]]
new_ar = w / h#1
scale = rand(scale_low, scale_high)#0.2n+0.7
if new_ar < 1:
nh = int(scale * h)#(0.2n+0.7)*416
nw = int(nh * new_ar)
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# 进行色域变换
hue = rand(-hue, hue)#0.2n - 0.1
sat = rand(1, sat) if rand(0,1) < .5 else 1 / rand(1, sat)#0.5n + 1或其倒数
val = rand(1, val) if rand(0,1) < .5 else 1 / rand(1, val)#0.5n + 1或其倒数
x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue*360#H变换
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:,:, 0]>360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0
image = cv2.cvtColor(x, cv2.COLOR_HSV2RGB) #转换为RGB
image = Image.fromarray((image * 255).astype(np.uint8))
# 将图片进行放置,分别对应四张分割图片的位置,详情查看上面的讲解
dx = place_x[index]
dy = place_y[index]
new_image = Image.new('RGB', (w, h),(np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
new_image.paste(image, (dx, dy))
image_data = np.array(new_image)
index = index + 1
box_data = []
# 对box进行重新处理
if len(box) > 0:
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
box[:, 0:2][box[:, 0:2] < 0] = 0
box[:, 2][box[:, 2] > w] = w
box[:, 3][box[:, 3] > h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)]
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
# 将图片分割,放在一起。生成随机数 124 291
cutx = np.random.randint(int(w * min_offset_x), int(w * (1 - min_offset_x)))
cuty = np.random.randint(int(h * min_offset_y), int(h * (1 - min_offset_y)))
new_image = np.zeros([h, w, 3])
new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]
new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]
new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]
new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]
# 对框进行进一步的处理,见utils的merge_bboxes
new_boxes = np.array(merge_bboxes(box_datas, cutx, cuty))