我有以下用于图像分割任务的自定义数据集类。
class LoadDataset(Dataset):
def __init__(self, img_dir, mask_dir, apply_transforms = None):
self.img_dir = img_dir
self.mask_dir = mask_dir
self.transforms = apply_transforms
self.img_paths, self.mask_paths = self.__get_all_paths()
self.__pil_to_tensor = transforms.PILToTensor()
self.__float_tensor = transforms.ToDtype(torch.float32, scale = True)
self.__grayscale = transforms.Grayscale()
def __get_all_paths(self):
img_paths = [os.path.join(self.img_dir, img_name.name) for img_name in os.scandir(self.img_dir) if os.path.isfile(img_name)]
mask_paths = [os.path.join(self.mask_dir, mask_name.name) for mask_name in os.scandir(self.mask_dir) if os.path.isfile(mask_name)]
img_paths = sorted(img_paths)
mask_paths = sorted(mask_paths)
return img_paths, mask_paths
def __len__(self):
return len(self.img_paths)
def __getitem__(self, index):
img_path, mask_path = self.img_paths[index], self.mask_paths[index]
img_PIL = Image.open(img_path)
mask_PIL = Image.open(mask_path)
img_tensor = self.__pil_to_tensor(img_PIL)
img_tensor = self.__float_tensor(img_tensor)
mask_tensor = self.__pil_to_tensor(mask_PIL)
mask_tensor = self.__float_tensor(mask_tensor)
mask_tensor = self.__grayscale(mask_tensor)
if self.transforms:
img_tensor, mask_tensor = self.transforms(img_tensor, mask_tensor)
return img_tensor, mask_tensor
当我应用以下转换时:
transforms.RandomHorizontalFlip()
图像或蒙版正在翻转。但是如果我将
__getitem__
中的转换顺序更改为以下内容,那么它就可以正常工作。
def __getitem__(self, index):
img_path, mask_path = self.img_paths[index], self.mask_paths[index]
img_PIL = Image.open(img_path)
mask_PIL = Image.open(mask_path)
if self.transforms:
img_PIL, mask_PIL = self.transforms(img_PIL, mask_PIL)
img_tensor = self.__pil_to_tensor(img_PIL)
mask_tensor = self.__pil_to_tensor(mask_PIL)
img_tensor = self.__float_tensor(img_tensor)
mask_tensor = self.__float_tensor(mask_tensor)
mask_tensor = self.__grayscale(mask_tensor)
return img_tensor, mask_tensor
顺序转换重要吗?我使用
torchvision.transforms.v2
进行所有转换。
是的,转换的顺序很重要。在这种情况下,到张量的变换会产生影响。当
v2.RandomHorizontalFlip
被赋予两个张量时,翻转将独立应用。然而,当给出两个 PIL 图像时,相同的变换将应用于两个图像,从而保持图像和掩模对齐。
为了获得更一致的处理,您可以尝试使用
TVTensors
进行数据增强。使用它们,您可以在转换每个数据输入之前指定它们的类型。例如:
from torchvision import tv_tensors
img_tensor = tv_tensors.Image(img_tensor)
mask_tensor= tv_tensors.Mask(mask_tensor)