我正在使用以下相册转换来转换给定输入图像以实现我的 YOLOv3:
# Define the transformations that were applied
test_transforms = A.Compose(
[
A.LongestMaxSize(max_size=IMAGE_SIZE),
A.PadIfNeeded(
min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
),
A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255),
],
bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)
我需要反转图像和边界框的填充以获得原始边界框和图像,但我看不到相册转换的逆操作。
我尝试了以下代码来反转转换过程,但某些值相差 1 或 2:
import os.path
import numpy as np
import albumentations as A
import cv2
import torch
from albumentations.pytorch import ToTensorV2
from PIL import Image, ImageFile
import config
def yolo_to_xml_bbox_xyxy(bbox, w, h):
# x_center, y_center width heigth
w_half_len = (bbox[2] * w) / 2
h_half_len = (bbox[3] * h) / 2
xmin = int((bbox[0] * w) - w_half_len)
ymin = int((bbox[1] * h) - h_half_len)
xmax = int((bbox[0] * w) + w_half_len)
ymax = int((bbox[1] * h) + h_half_len)
return (xmin, ymin, xmax, ymax, bbox[4])
def xml_bbox_xyxy_to_yolo_bbox(bbox, w, h):
# x_center, y_center width heigth
xmin,ymin,xmax,ymax, c = bbox
x = (xmin + xmax) / 2.0 / w
y = (ymin + ymax) / 2.0 / h
w = (xmax - xmin) / float(w)
h = (ymax - ymin) / float(h)
return (x, y, w, h, c)
def yolo_to_xml_bbox_xyxy_np(bbox, w, h):
# x_center, y_center width heigth
w_half_len = (bbox[2] * w) / 2
h_half_len = (bbox[3] * h) / 2
xmin = int((bbox[0] * w) - w_half_len)
ymin = int((bbox[1] * h) - h_half_len)
xmax = int((bbox[0] * w) + w_half_len)
ymax = int((bbox[1] * h) + h_half_len)
return [xmin, ymin, xmax, ymax, bbox[4]]
# def yolo_to_xml_bbox_xywh(bbox, w, h):
# # x_center, y_center width heigth
# w_half_len = (bbox[2] * w) / 2
# h_half_len = (bbox[3] * h) / 2
# xmin = int((bbox[0] * w) - w_half_len)
# ymin = int((bbox[1] * h) - h_half_len)
# xmax = int((bbox[0] * w) + w_half_len)
# ymax = int((bbox[1] * h) + h_half_len)
# # return [bbox[4],xmin, ymin, xmax, ymax]
# return [xmin, ymin, xmax-xmin, ymax-ymin]
# Define the original bounding box annotation
original_width = 1280
original_height = 720
IMAGE_SIZE=608
# Check if the inverse transformation is correct for all labels annotations
labels_path = config.DATASET + "/labels"
labels_fns = os.listdir(labels_path)
labels_pathswe = [os.path.join(labels_path, os.path.splitext(label_fn)[0]) for label_fn in labels_fns]
# Define the transformations that were applied
test_transforms = A.Compose(
[
A.LongestMaxSize(max_size=IMAGE_SIZE),
A.PadIfNeeded(
min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
),
A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255),
# ToTensorV2(),
],
bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)
for label_pathwe in labels_pathswe:
print("Label: ", label_pathwe)
annotations = np.loadtxt(fname=label_pathwe+".txt", delimiter=" ", ndmin=2)
annotation_base = annotations[:, 0:5]
contacts = annotations[:, 5:6]
annotation_unit_vec_xy = annotations[:, 6:8]
annotation_unit_vec_mag = annotations[:, 8:9]
bboxes_base = np.roll(annotation_base, 4, axis=1)
# bboxes_base[:, :4] = np.clip(bboxes_base[:, :4], a_min=0.0000001, a_max=1.0)
# Ensure that x y w h
# img_path = "100DOH_DL/images/100DOH_repair_v_Gpk-ptHtIg0_frame000027.jpg"
img_path = label_pathwe.replace("labels", "images")+".jpg"
org_image = np.array(Image.open(img_path).convert("RGB"))
org_xyxy_boxes = []
for bbox in bboxes_base.tolist():
org_xyxy_boxes.append(yolo_to_xml_bbox_xyxy(bbox, original_width, original_height))
augmentations = test_transforms(image=org_image, bboxes=bboxes_base)
tf_image = augmentations["image"]
transformed_bbox = augmentations["bboxes"]
# Inverse transformation
inverted_transform = A.Compose(
[
A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
A.Resize(height=original_height, width=original_width, interpolation=cv2.INTER_LINEAR, always_apply=True),
],
bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
)
# Find non-padded region of image
# non_padded_rows = [i for i in range(tf_image.shape[0]) if sum(tf_image[i,:,0]) > 0]
# non_padded_cols = [i for i in range(tf_image.shape[1]) if sum(tf_image[:,i,0]) > 0]
# top_row, bottom_row = non_padded_rows[0], non_padded_rows[-1]
# left_col, right_col = non_padded_cols[0], non_padded_cols[-1]
#
# # Crop image
# cropped_image = tf_image[top_row:bottom_row+1, left_col:right_col+1, :]
non_padded_rows = np.where(tf_image[:, :, 0].sum(axis=1) > 0)[0]
non_padded_cols = np.where(tf_image[:, :, 0].sum(axis=0) > 0)[0]
top_row, bottom_row = non_padded_rows[0], non_padded_rows[-1]
left_col, right_col = non_padded_cols[0], non_padded_cols[-1]
# Crop image
cropped_image = tf_image[top_row:bottom_row, left_col:right_col, :]
new_bboxes = []
for bbox in transformed_bbox:
x, y, w, h, c = bbox
# abs_x = x * tf_image.shape[1]
# abs_y = y * tf_image.shape[0]
# abs_w = w * tf_image.shape[1]
# abs_h = h * tf_image.shape[0]
xyxy_bbox = yolo_to_xml_bbox_xyxy(bbox, tf_image.shape[1], tf_image.shape[0])
xmin, ymin, xmax, ymax, c = xyxy_bbox
# new_x = (abs_x - left_col) / (right_col - left_col)
# new_y = (abs_y - top_row) / (bottom_row - top_row)
# new_w = abs_w / (right_col - left_col)
# new_h = abs_h / (bottom_row - top_row)
new_x1 = xmin - left_col
new_y1 = ymin - top_row
# new_x2 = xmax - left_col
# new_y2 = ymax - top_row
new_x2 = xmax - left_col
new_y2 = ymax - top_row
yolo_inv_bbox = xml_bbox_xyxy_to_yolo_bbox([new_x1, new_y1, new_x2, new_y2,c],cropped_image.shape[1], cropped_image.shape[0])
new_bboxes.append(yolo_inv_bbox)
augmentations_inv = inverted_transform(image=tf_image, bboxes=new_bboxes)
invtf_image = augmentations_inv["image"]
invtransformed_bboxes = augmentations_inv["bboxes"]
inv_org_xyxy_boxes = []
for bbox in invtransformed_bboxes:
# for bbox in new_bboxes:
inv_org_xyxy_boxes.append(yolo_to_xml_bbox_xyxy(bbox, original_width, original_height))
if len(org_xyxy_boxes) != len(inv_org_xyxy_boxes):
print("Original and inverted annotations not matching!")
continue
theyAreSame = True
for bbidx, org_bbox in enumerate(org_xyxy_boxes):
if set(org_xyxy_boxes) != set(inv_org_xyxy_boxes[bbidx]):
theyAreSame = False
break
if theyAreSame == False:
print("Original and inverted annotations not matching!")
else:
print("Ok!")