这是我的模型:
import torch
import torch.nn as nn
import torch.nn.functional as F
# Define the shallow CNN
class ShallowCNN(nn.Module):
def __init__(self, in_channels, out_dim):
super(ShallowCNN, self).__init__()
self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
# Use a dummy input to calculate the flattened size
self.flatten_size = self._get_flattened_size(in_channels)
self.fc = nn.Linear(self.flatten_size, out_dim)
def _get_flattened_size(self, in_channels):
# Create a dummy tensor to calculate the size after conv/pool layers
dummy_input = torch.zeros(1, in_channels, 32, 32)
output = self.pool(F.relu(self.conv2(F.relu(self.conv1(dummy_input)))))
return output.numel() # Return the number of elements in the flattened tensor
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(x.size(0), -1) # Flatten the tensor
x = F.relu(self.fc(x))
return x
class DeepCNN(nn.Module):
def __init__(self, out_dim):
super(DeepCNN, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2, 2)
)
# Use a dummy input to calculate the flattened size
self.flatten_size = self._get_flattened_size()
self.fc = nn.Linear(self.flatten_size, out_dim)
def _get_flattened_size(self):
# Create a dummy tensor to calculate the size after conv/pool layers
dummy_input = torch.zeros(1, 3, 32, 32)
output = self.features(dummy_input)
return output.numel() # Return the number of elements in the flattened tensor
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1) # Flatten the tensor
x = F.relu(self.fc(x))
return x
class MultiScaleCNN(nn.Module):
def __init__(self, out_dim):
super(MultiScaleCNN, self).__init__()
self.shallow1 = ShallowCNN(3, out_dim // 3)
self.shallow2 = ShallowCNN(3, out_dim // 3)
self.deep = DeepCNN(out_dim // 3)
# Output size after concatenating the embeddings from all three networks
self.fc = nn.Linear(out_dim, out_dim)
def forward(self, x):
# Pass through shallow and deep networks
x1 = self.shallow1(x)
x2 = self.shallow2(x)
x3 = self.deep(x)
# Combine outputs
x_combined = torch.cat([x1, x2, x3], dim=1)
# Final fully connected layer
x_out = F.relu(self.fc(x_combined))
return x_out
# Define the Siamese Network
class SiameseNetwork(nn.Module):
def __init__(self, embedding_dim):
super(SiameseNetwork, self).__init__()
self.multi_scale_cnn = MultiScaleCNN(embedding_dim)
def forward(self, input1, input2):
# Generate embeddings for both inputs
output1 = self.multi_scale_cnn(input1)
output2 = self.multi_scale_cnn(input2)
return output1, output2
# Instantiate the model
embedding_dim = 4096 # As described in the paper
model = SiameseNetwork(embedding_dim=embedding_dim)
print(model)
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from tqdm import tqdm
from PIL import Image
import torch
from torchvision import transforms
# 1. Contrastive Loss Function
class ContrastiveLoss(nn.Module):
def __init__(self, margin=1.0):
super(ContrastiveLoss, self).__init__()
self.margin = margin
def forward(self, output1, output2, label):
# Calculate Euclidean distance
euclidean_distance = F.pairwise_distance(output1, output2)
# Loss function
loss = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) +
label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
return loss
# 2. Prepare Pair Dataset
class SiameseCIFAR10Dataset(Dataset):
def __init__(self, images, labels, transform=None):
self.images = images
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
anchor_image = self.images[idx]
anchor_label = self.labels[idx]
# Positive Pair: Choose an image with the same label
positive_idx = np.random.choice(np.where(self.labels == anchor_label)[0])
positive_image = self.images[positive_idx]
# Negative Pair: Choose an image with a different label
negative_idx = np.random.choice(np.where(self.labels != anchor_label)[0])
negative_image = self.images[negative_idx]
# Convert numpy array to PIL Image
anchor_image = Image.fromarray((anchor_image * 255).astype(np.uint8))
positive_image = Image.fromarray((positive_image * 255).astype(np.uint8))
negative_image = Image.fromarray((negative_image * 255).astype(np.uint8))
# Apply transformations
if self.transform:
anchor_image = self.transform(anchor_image)
positive_image = self.transform(positive_image)
negative_image = self.transform(negative_image)
# Return anchor-positive and anchor-negative pairs
return (anchor_image, positive_image, torch.tensor(0)), (anchor_image, negative_image, torch.tensor(1))
# Define the transformation
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])
# Create training dataset and loader for Siamese Network
train_pairs_dataset = SiameseCIFAR10Dataset(train_data, train_labels, transform)
train_loader = DataLoader(train_pairs_dataset, batch_size=32, shuffle=True)
# 3. Training Loop
def train_siamese_network(model, train_loader, criterion, optimizer, epochs, device):
model.to(device)
for epoch in range(epochs):
model.train()
running_loss = 0.0
for (anchor, positive, label_pos), (anchor_neg, negative, label_neg) in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}"):
# Move data to device
anchor = anchor.to(device)
positive = positive.to(device)
negative = negative.to(device)
label_pos = label_pos.float().to(device)
label_neg = label_neg.float().to(device)
# Zero gradients
optimizer.zero_grad()
# Forward pass for positive pair
***output1_pos, output2_pos = model(anchor, positive)***
loss_pos = criterion(output1_pos, output2_pos, label_pos)
# Forward pass for negative pair
output1_neg, output2_neg = model(anchor_neg, negative)
loss_neg = criterion(output1_neg, output2_neg, label_neg)
# Combine losses and backward pass
loss = loss_pos + loss_neg
loss.backward()
optimizer.step()
running_loss += loss.item()
# Epoch Loss
print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}")
# 4. Initialize Model, Loss, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SiameseNetwork(embedding_dim=4096)
criterion = ContrastiveLoss(margin=1.0)
optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
# 5. Train the Model
train_siamese_network(model, train_loader, criterion, optimizer, epochs=10, device=device)
粗体和斜体线给出了错误。如何解决呢?我正在尝试在 CIFAR-10 数据集上运行暹罗网络。我已经预处理并提取了数据集,但由于上述原因我无法对其进行训练。
在创建
MultiscaleCNN
时,您希望将嵌入dim分为3部分,但是4096
不能被3
整除,而是将子网络的每个维度转换为4096//3 = 1365
,然后乘以3
给出 1365 * 3 = 4095
。为了快速修复,要初始化 DeepCNN
,您可以传递 out_dim - (out_dim // 3) * 2
作为剩余尺寸。