我有一个图像字幕数据集,其中每个样本都由一个图像和一个字幕列表组成。
这是一个视觉示例:
我正在使用 PyTorch,我创建了自定义
Dataset
和 Dataloader
来训练模型和执行评估。
使
Dataset
和 Dataloader
处理这两种情况的最佳方法是什么,即提供一个随机选择的训练标签和多参考指标的所有标签?
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self, root, split, image_transform, processor):
file = pl.Path(root) / '{}.json'.format(split)
with open(file) as f:
j = json.load(f)
self.data = list(j.values())
self.split = split
self.image_transform = image_transform
self.processor = processor
def __getitem__(self, i):
image_path = self.data[i]['img_url']
image = Image.open(image_path).convert('RGB')
# randomly sample one visual sentence
labels = self.data[i]['visual_sentences']
if self.image_transform is not None:
image = self.image_transform(image)
encoding = self.processor(images=image, text=random.sample(labels, 1), padding="max_length", return_tensors="pt")
# remove batch dimension
encoding = {k: v.squeeze() for k, v in encoding.items()}
# add all the labels if not in training
if self.split != 'train':
encoding['labels'] = labels
return encoding
class MyDataLoader(BaseDataLoader):
def __init__(self, data_dir, batch_size, split, shuffle=True, validation_split=0.0, num_workers=1, processor=None):
transform = transforms.Compose([
transforms.Resize((224, 224))
])
processor = AutoProcessor.from_pretrained(processor)
self.data_dir = data_dir
self.dataset = MyDataset(data_dir, split, image_transform=transform, processor=processor)
super().__init__(self.dataset, batch_size, shuffle, validation_split, num_workers)