'''
根据txt标签文件内的信息,制作真实的标记框,并进行一定的数据增强,最终输出一个7*7*30的张量
'''
import torch
import cv2
import os
import os.path
import random
import numpy as np
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from PIL import Image
CLASS_NUM = 20
class yoloDataset(Dataset):
'''
继承Dataset类,必须要实现 __getitem__和 __len__
'''
image_size = 448
def __init__(self,img_root,list_file,train,transform):
self.root = img_root
self.train = train
self.transform = transform
self.fnames = []
self.boxes = []
self.labels = []
self.S = 7
self.B = 2
self.C = CLASS_NUM
self.mean = (123,117,104)
file_txt = open(list_file)
lines = file_txt.readlines()
for line in lines:
splited = line.strip().split()
self.fnames.append(splited[0])
num_boxes = (len(splited) - 1) // 5
box = []
label = []
for i in range(num_boxes):
x = float(splited[1 + 5 * i])
y = float(splited[2 + 5 * i])
x2 = float(splited[3 + 5 * i])
y2 = float(splited[4 + 5 * i])
c = splited[5 + 5 * i]
box.append([x, y, x2, y2])
label.append(int(c))
self.boxes.append(torch.Tensor(box))
self.labels.append(torch.LongTensor(label))
self.num_samples = len(self.boxes)
def __getitem__(self, idx):
'''
传入id后
1.数据增强:自己实现的 随机翻转,随机放缩,randomBlur,RandomBrightness,randomShift
2.对标注的框的坐标进行/ (w,h)进行归一化
3.pytorch预训练使用RGB,图片默认时BGR,所以将BGR2RGB
4.将图片减去均值进行归一化
5.将图片resize乘指定大小,此时时448
6.将图片标签编码到7x7*30的向量
:param idx:
:return:
'''
fname = self.fnames[idx]
img = cv2.imread(os.path.join(self.root + fname))
boxes = self.boxes[idx].clone()
labels = self.labels[idx].clone()
if self.train:
img, boxes = self.random_flip(img, boxes)
img, boxes = self.randomScale(img, boxes)
img = self.randomBlur(img)
img = self.RandomBrightness(img)
img, boxes, labels = self.randomShift(img, boxes, labels)
h, w, _ = img.shape
boxes /= torch.Tensor([w, h, w, h]).expand_as(boxes)
img = self.BGR2RGB(img)
img = self.subMean(img, self.mean)
img = cv2.resize(img, (self.image_size, self.image_size))
target = self.encoder(boxes, labels)
for t in self.transform:
img = t(img)
return img, target
def __len__(self):
return self.num_samples
def encoder(self,boxes,labels):
'''
输入:
输出: 返回target, target是 (网格x,网格y,第一个框x1,y1,w1,h1,第二个框的x2,y2,w2,h2) ,目标框都是归一化后的。
:param boxes: 归一化后的边界框 宽和高应该是 1,1
:param labels: ground truth (7x7)
:return:
'''
grid_num =7
target = torch.zeros((grid_num,grid_num,int(CLASS_NUM + 2 * 5)))
cell_size = 1. /grid_num
wh = boxes[:,2:] - boxes[:,:2]
print(f'wh={wh}')
cxcy = (boxes[:,2:] + boxes[:,:2]) / 2
for i in range(cxcy.size()[0]):
cxcy_sample = cxcy[i]
for i in range(cxcy.size()[0]):
cxcy_sample = cxcy[i]
ij = (cxcy_sample / cell_size).ceil() - 1
target[int(ij[1]), int(ij[0]), 4] = 1
target[int(ij[1]), int(ij[0]), 9] = 1
target[int(ij[1]), int(ij[0]), int(labels[i]) + 10] = 1
xy = ij * cell_size
delta_xy = (cxcy_sample - xy) / cell_size
target[int(ij[1]), int(ij[0]), 2:4] = wh[i]
target[int(ij[1]), int(ij[0]), :2] = delta_xy
target[int(ij[1]), int(ij[0]), 7:9] = wh[i]
target[int(ij[1]), int(ij[0]), 5:7] = delta_xy
return target
def BGR2RGB(self, img):
return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
def BGR2HSV(self, img):
return cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
def HSV2BGR(self, img):
return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
def RandomBrightness(self, bgr):
if random.random() < 0.5:
hsv = self.BGR2HSV(bgr)
h, s, v = cv2.split(hsv)
adjust = random.choice([0.5, 1.5])
v = v * adjust
v = np.clip(v, 0, 255).astype(hsv.dtype)
hsv = cv2.merge((h, s, v))
bgr = self.HSV2BGR(hsv)
return bgr
def RandomSaturation(self, bgr):
if random.random() < 0.5:
hsv = self.BGR2HSV(bgr)
h, s, v = cv2.split(hsv)
adjust = random.choice([0.5, 1.5])
s = s * adjust
s = np.clip(s, 0, 255).astype(hsv.dtype)
hsv = cv2.merge((h, s, v))
bgr = self.HSV2BGR(hsv)
return bgr
def RandomHue(self, bgr):
if random.random() < 0.5:
hsv = self.BGR2HSV(bgr)
h, s, v = cv2.split(hsv)
adjust = random.choice([0.5, 1.5])
h = h * adjust
h = np.clip(h, 0, 255).astype(hsv.dtype)
hsv = cv2.merge((h, s, v))
bgr = self.HSV2BGR(hsv)
return bgr
def randomBlur(self, bgr):
if random.random() < 0.5:
bgr = cv2.blur(bgr, (5, 5))
return bgr
def randomShift(self, bgr, boxes, labels):
center = (boxes[:, 2:] + boxes[:, :2]) / 2
if random.random() < 0.5:
height, width, c = bgr.shape
after_shfit_image = np.zeros((height, width, c), dtype=bgr.dtype)
after_shfit_image[:, :, :] = (104, 117, 123)
shift_x = random.uniform(-width * 0.2, width * 0.2)
shift_y = random.uniform(-height * 0.2, height * 0.2)
if shift_x >= 0 and shift_y >= 0:
after_shfit_image[int(shift_y):,int(shift_x):,:] = bgr[:height - int(shift_y),:width - int(shift_x),:]
elif shift_x >= 0 and shift_y < 0:
after_shfit_image[:height + int(shift_y),
int(shift_x):,
:] = bgr[-int(shift_y):,
:width - int(shift_x),
:]
elif shift_x < 0 and shift_y >= 0:
after_shfit_image[int(shift_y):, :width +
int(shift_x), :] = bgr[:height -
int(shift_y), -
int(shift_x):, :]
elif shift_x < 0 and shift_y < 0:
after_shfit_image[:height + int(shift_y), :width + int(
shift_x), :] = bgr[-int(shift_y):, -int(shift_x):, :]
shift_xy = torch.FloatTensor(
[[int(shift_x), int(shift_y)]]).expand_as(center)
center = center + shift_xy
mask1 = (center[:, 0] > 0) & (center[:, 0] < width)
mask2 = (center[:, 1] > 0) & (center[:, 1] < height)
mask = (mask1 & mask2).view(-1, 1)
boxes_in = boxes[mask.expand_as(boxes)].view(-1, 4)
if len(boxes_in) == 0:
return bgr, boxes, labels
box_shift = torch.FloatTensor(
[[int(shift_x), int(shift_y), int(shift_x), int(shift_y)]]).expand_as(boxes_in)
boxes_in = boxes_in + box_shift
labels_in = labels[mask.view(-1)]
return after_shfit_image, boxes_in, labels_in
return bgr, boxes, labels
def randomScale(self, bgr, boxes):
if random.random() < 0.5:
scale = random.uniform(0.8, 1.2)
height, width, c = bgr.shape
bgr = cv2.resize(bgr, (int(width * scale), height))
scale_tensor = torch.FloatTensor(
[[scale, 1, scale, 1]]).expand_as(boxes)
boxes = boxes * scale_tensor
return bgr, boxes
return bgr, boxes
def randomCrop(self, bgr, boxes, labels):
if random.random() < 0.5:
center = (boxes[:, 2:] + boxes[:, :2]) / 2
height, width, c = bgr.shape
h = random.uniform(0.6 * height, height)
w = random.uniform(0.6 * width, width)
x = random.uniform(0, width - w)
y = random.uniform(0, height - h)
x, y, h, w = int(x), int(y), int(h), int(w)
center = center - torch.FloatTensor([[x, y]]).expand_as(center)
mask1 = (center[:, 0] > 0) & (center[:, 0] < w)
mask2 = (center[:, 1] > 0) & (center[:, 1] < h)
mask = (mask1 & mask2).view(-1, 1)
boxes_in = boxes[mask.expand_as(boxes)].view(-1, 4)
if (len(boxes_in) == 0):
return bgr, boxes, labels
box_shift = torch.FloatTensor([[x, y, x, y]]).expand_as(boxes_in)
boxes_in = boxes_in - box_shift
boxes_in[:, 0] = boxes_in[:, 0].clamp_(min=0, max=w)
boxes_in[:, 2] = boxes_in[:, 2].clamp_(min=0, max=w)
boxes_in[:, 1] = boxes_in[:, 1].clamp_(min=0, max=h)
boxes_in[:, 3] = boxes_in[:, 3].clamp_(min=0, max=h)
labels_in = labels[mask.view(-1)]
img_croped = bgr[y:y + h, x:x + w, :]
return img_croped, boxes_in, labels_in
return bgr, boxes, labels
def subMean(self, bgr, mean):
mean = np.array(mean, dtype=np.float32)
bgr = bgr - mean
return bgr
def random_flip(self, im, boxes):
if random.random() < 0.5:
im_lr = np.fliplr(im).copy()
h, w, _ = im.shape
xmin = w - boxes[:, 2]
xmax = w - boxes[:, 0]
boxes[:, 0] = xmin
boxes[:, 2] = xmax
return im_lr, boxes
return im, boxes
def random_bright(self, im, delta=16):
alpha = random.random()
if alpha > 0.3:
im = im * alpha + random.randrange(-delta, delta)
im = im.clip(min=0, max=255).astype(np.uint8)
return im