我想在图像序列上构建模型(RNN)来预测二元变量。我看到了很多多输入分类的教程,但我没有找到如何导入数据的地方? 我有一个包含图像序列文件夹的文件夹,如下所示:
-- film_1
-------------film_1_image1
-------------电影_1_图像2
-------------电影_1_图像3
-------------电影_1_图片4
-- film_2
-------------film_2_image1
-------------film_2_image2
-------------film_2_image3
-------------film_2_image4
我知道我需要具有形状(批次、时间、宽度、高度)的数据,但我不知道该怎么做。我在
keras
和pytorch
中寻找解决方案,但没有找到任何东西。
我想导入我的电影,在训练/验证中分割数据集并训练模型。
有谁知道如何导入这样的数据?
我尝试使用
DataLoader
,但很难理解它是如何工作的。因此,如果有人可以提供帮助,那就太好了。
这是一个用于训练 RNN 的 PyTorch 示例,
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
class ImageSequenceDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
self.sequences = sorted(os.listdir(root_dir))
def __len__(self):
return len(self.sequences)
def __getitem__(self, idx):
sequence_dir = os.path.join(self.root_dir, self.sequences[idx])
images = sorted(os.listdir(sequence_dir))
image_sequence = []
for image_name in images:
image_path = os.path.join(sequence_dir, image_name)
image = Image.open(image_path).convert('RGB')
if self.transform:
image = self.transform(image)
image_sequence.append(image)
# Convert list of images into a single tensor
image_sequence = torch.stack(image_sequence)
# Example: Replace with your method to get the label
label = self.get_label(sequence_dir)
return image_sequence, label
def get_label(self, sequence_dir):
# Implement your label extraction logic here
# This is a dummy implementation, replace it with your actual label logic
if "positive" in sequence_dir:
return 1
else:
return 0
那么你必须定义你的变换,
transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
创建数据集和加载器,
# Assuming the data is located in "path/to/your/data"
dataset = ImageSequenceDataset(root_dir="path/to/your/data", transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4)
现在您可以设置 RNN 模型,例如,
class RNNModel(nn.Module):
def __init__(self):
super(RNNModel, self).__init__()
# Example CNN to process each frame
self.cnn = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.rnn = nn.LSTM(input_size=64*16*16, hidden_size=128, num_layers=1, batch_first=True)
self.fc = nn.Linear(128, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
batch_size, seq_len, c, h, w = x.size()
cnn_out = []
for t in range(seq_len):
out = self.cnn(x[:, t, :, :, :])
out = out.view(batch_size, -1) # Flatten the output
cnn_out.append(out)
cnn_out = torch.stack(cnn_out, dim=1) # Reshape to (batch_size, seq_len, -1)
rnn_out, _ = self.rnn(cnn_out)
out = self.fc(rnn_out[:, -1, :]) # Use the last RNN output
out = self.sigmoid(out)
return out
模型初始化和损失函数定义,
model = RNNModel()
criterion = nn.BCELoss() # Binary Cross Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)
然后开始训练你的模型,
num_epochs = 10
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for i, (inputs, labels) in enumerate(dataloader):
inputs, labels = inputs, labels.float().view(-1, 1) # Adjust labels shape
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader)}")
希望这个示例代码片段会对您有所帮助。干杯!