我正在尝试创建多尺度 CNN,但面临此错误:RuntimeError: mat1 和 mat2 形状无法相乘(32x4095 和 4096x4096)

问题描述 投票:0回答:1

这是我的模型:

import torch
import torch.nn as nn
import torch.nn.functional as F

# Define the shallow CNN
class ShallowCNN(nn.Module):
    def __init__(self, in_channels, out_dim):
        super(ShallowCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        # Use a dummy input to calculate the flattened size
        self.flatten_size = self._get_flattened_size(in_channels)

        self.fc = nn.Linear(self.flatten_size, out_dim)

    def _get_flattened_size(self, in_channels):
        # Create a dummy tensor to calculate the size after conv/pool layers
        dummy_input = torch.zeros(1, in_channels, 32, 32)
        output = self.pool(F.relu(self.conv2(F.relu(self.conv1(dummy_input)))))
        return output.numel()  # Return the number of elements in the flattened tensor

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc(x))
        return x


class DeepCNN(nn.Module):
    def __init__(self, out_dim):
        super(DeepCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        # Use a dummy input to calculate the flattened size
        self.flatten_size = self._get_flattened_size()

        self.fc = nn.Linear(self.flatten_size, out_dim)

    def _get_flattened_size(self):
        # Create a dummy tensor to calculate the size after conv/pool layers
        dummy_input = torch.zeros(1, 3, 32, 32)
        output = self.features(dummy_input)
        return output.numel()  # Return the number of elements in the flattened tensor

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc(x))
        return x


class MultiScaleCNN(nn.Module):
    def __init__(self, out_dim):
        super(MultiScaleCNN, self).__init__()
        self.shallow1 = ShallowCNN(3, out_dim // 3)
        self.shallow2 = ShallowCNN(3, out_dim // 3)
        self.deep = DeepCNN(out_dim // 3)

        # Output size after concatenating the embeddings from all three networks
        self.fc = nn.Linear(out_dim, out_dim)

    def forward(self, x):
        # Pass through shallow and deep networks
        x1 = self.shallow1(x)
        x2 = self.shallow2(x)
        x3 = self.deep(x)

        # Combine outputs
        x_combined = torch.cat([x1, x2, x3], dim=1)

        # Final fully connected layer
        x_out = F.relu(self.fc(x_combined))
        return x_out




# Define the Siamese Network
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim):
        super(SiameseNetwork, self).__init__()
        self.multi_scale_cnn = MultiScaleCNN(embedding_dim)

    def forward(self, input1, input2):
        # Generate embeddings for both inputs
        output1 = self.multi_scale_cnn(input1)
        output2 = self.multi_scale_cnn(input2)
        return output1, output2

# Instantiate the model
embedding_dim = 4096  # As described in the paper
model = SiameseNetwork(embedding_dim=embedding_dim)
print(model)


from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
from tqdm import tqdm
from PIL import Image
import torch
from torchvision import transforms

# 1. Contrastive Loss Function
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        # Calculate Euclidean distance
        euclidean_distance = F.pairwise_distance(output1, output2)
        # Loss function
        loss = torch.mean((1 - label) * torch.pow(euclidean_distance, 2) +
                          label * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss

# 2. Prepare Pair Dataset
class SiameseCIFAR10Dataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        anchor_image = self.images[idx]
        anchor_label = self.labels[idx]

        # Positive Pair: Choose an image with the same label
        positive_idx = np.random.choice(np.where(self.labels == anchor_label)[0])
        positive_image = self.images[positive_idx]

        # Negative Pair: Choose an image with a different label
        negative_idx = np.random.choice(np.where(self.labels != anchor_label)[0])
        negative_image = self.images[negative_idx]

        # Convert numpy array to PIL Image
        anchor_image = Image.fromarray((anchor_image * 255).astype(np.uint8))
        positive_image = Image.fromarray((positive_image * 255).astype(np.uint8))
        negative_image = Image.fromarray((negative_image * 255).astype(np.uint8))

        # Apply transformations
        if self.transform:
            anchor_image = self.transform(anchor_image)
            positive_image = self.transform(positive_image)
            negative_image = self.transform(negative_image)

        # Return anchor-positive and anchor-negative pairs
        return (anchor_image, positive_image, torch.tensor(0)), (anchor_image, negative_image, torch.tensor(1))


# Define the transformation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Create training dataset and loader for Siamese Network
train_pairs_dataset = SiameseCIFAR10Dataset(train_data, train_labels, transform)
train_loader = DataLoader(train_pairs_dataset, batch_size=32, shuffle=True)


# 3. Training Loop
def train_siamese_network(model, train_loader, criterion, optimizer, epochs, device):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for (anchor, positive, label_pos), (anchor_neg, negative, label_neg) in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{epochs}"):
            # Move data to device
            anchor = anchor.to(device)
            positive = positive.to(device)
            negative = negative.to(device)
            label_pos = label_pos.float().to(device)
            label_neg = label_neg.float().to(device)

            # Zero gradients
            optimizer.zero_grad()

            # Forward pass for positive pair
            ***output1_pos, output2_pos = model(anchor, positive)***
            loss_pos = criterion(output1_pos, output2_pos, label_pos)

            # Forward pass for negative pair
            output1_neg, output2_neg = model(anchor_neg, negative)
            loss_neg = criterion(output1_neg, output2_neg, label_neg)

            # Combine losses and backward pass
            loss = loss_pos + loss_neg
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Epoch Loss
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader):.4f}")


# 4. Initialize Model, Loss, and Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SiameseNetwork(embedding_dim=4096)
criterion = ContrastiveLoss(margin=1.0)
optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)

# 5. Train the Model
train_siamese_network(model, train_loader, criterion, optimizer, epochs=10, device=device)

粗体和斜体线给出了错误。如何解决呢?我正在尝试在 CIFAR-10 数据集上运行暹罗网络。我已经预处理并提取了数据集,但由于上述原因我无法对其进行训练。

python machine-learning pytorch neural-network
1个回答
0
投票

在创建

MultiscaleCNN
时,您希望将嵌入dim分为3部分,但是
4096
不能被
3
整除,而是将子网络的每个维度转换为
4096//3 = 1365
,然后乘以
3 
给出
1365 * 3 = 4095
。为了快速修复,要初始化
DeepCNN
,您可以传递
out_dim - (out_dim // 3) * 2
作为剩余尺寸。

© www.soinside.com 2019 - 2024. All rights reserved.