我的预测输出不是我所期望的

问题描述 投票:0回答:0

我正在构建一个用于多类分类的 unet 架构。目标是识别图像上的食物(鸡蛋、西红柿、奶酪等)。我正在使用 Python 和 Pytorch 作为库。 我已经用我的 unet 架构运行了我的火车代码,但输出预测不是我所期望的。 输出仅显示一类值 1。有人可以帮我吗? 非常感谢

the output

这是我的 unet 架构:

def double_conv(in_channels, out_channels):
    conv = nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
        nn.ReLU(inplace=True)
    )
    return conv


class UNet(nn.Module):
    def __init__(self, num_classes):
        super(UNet, self).__init__()

        # definition of max pooling :

        self.max_pool_2x2_1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.max_pool_2x2_2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.max_pool_2x2_3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.max_pool_2x2_4 = nn.MaxPool2d(kernel_size=2, stride=2)
        # self.max_pool_2x2_5 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.down_conv_1 = double_conv(3, 16)
        self.down_conv_2 = double_conv(16, 32)
        self.down_conv_3 = double_conv(32, 64)
        self.down_conv_4 = double_conv(64, 128)
        # self.down_conv_5 = double_conv(128, 256)
        # self.down_conv_6 = double_conv(256, 512)

        # self.up_conv_trans1 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=2, stride=2)
        # self.up_conv_1 = double_conv(512, 256)

        # self.up_conv_trans2 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=2, stride=2)
        # self.up_conv_2 = double_conv(256, 128)

        self.up_conv_trans3 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=2, stride=2)
        self.up_conv_3 = double_conv(128, 64)

        self.up_conv_trans4 = nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=2, stride=2)
        self.up_conv_4 = double_conv(64, 32)

        self.up_conv_trans5 = nn.ConvTranspose2d(in_channels=32, out_channels=16, kernel_size=2, stride=2)
        self.up_conv_5 = double_conv(32, 16)

        self.out = nn.Conv2d(in_channels=16, out_channels=num_classes,  kernel_size=1)

    def forward(self, image):

        out_conv1 = self.down_conv_1(image)
        out_pool1 = self.max_pool_2x2_1(out_conv1)
        out_conv2 = self.down_conv_2(out_pool1)
        out_pool2 = self.max_pool_2x2_2(out_conv2)
        out_conv3 = self.down_conv_3(out_pool2)
        out_pool3 = self.max_pool_2x2_3(out_conv3)
        out_conv4 = self.down_conv_4(out_pool3)
        # out_pool4 = self.max_pool_2x2_4(out_conv4)
        # out_conv5 = self.down_conv_5(out_pool4)

        # decoder part

        # out_up_conv = self.up_conv_trans2(out_conv5)
        # out_up_conv = self.up_conv_2(torch.cat([out_up_conv, out_conv4], 1))

        out_up_conv = self.up_conv_trans3(out_conv4)
        out_up_conv = self.up_conv_3(torch.cat([out_up_conv, out_conv3], 1))

        out_up_conv = self.up_conv_trans4(out_up_conv)
        out_up_conv = self.up_conv_4(torch.cat([out_up_conv, out_conv2], 1))

        out_up_conv = self.up_conv_trans5(out_up_conv)
        out_up_conv = self.up_conv_5(torch.cat([out_up_conv, out_conv1], 1))

        out_up_conv = self.out(out_up_conv)
        return out_up_conv

这是我的训练代码:

import torch.nn.functional as F
import os
import torch.optim as optim
import numpy as np
from UNET_19classes import *
import random
from data_augmentation import transform


def check_nan(model, input):
    output = model(input)
    return torch.isnan(output).any()

# Sauvegarder le meilleur modèle
def save_model(epoch, model, optimizer, loss):
    print(f"save final model")
    torch.save({
        'epoch': epoch,
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'loss': loss,
    }, f'C:/Users/ne/Desktop/food-recognition/modele_sauvegarder/modele_{epoch}_{loss}.pth')

# Définition des classes
classes = ['background', 'bun', 'laitue', 'tomates', 'cheddar', 'hamburger',
    'sauce', 'oignons', 'cornichon', 'fromage', 'poulet',
    'fish', 'bacon', 'oeuf', 'oignon cuit', 'pomme de terre / frite',
    'avocat', 'crevette', 'poulet pané/tenders', 'champignon']


# Convertir la liste de classes en un objet PyTorch Tensor
class_to_idx = {classes[i]: i for i in range(len(classes))}
idx_to_class = {i: classes[i] for i in range(len(classes))}
class_to_idx_tensor = torch.tensor(list(class_to_idx.values()))


# Définition du modèle UNet
model = UNet(num_classes=20)

# Charger les données d'entraînement et de test
train_source = np.load("images_burger_train_set.npy")
train_source = train_source[..., ::-1]
# train_source = train_source / 255.0
test_source = np.load("images_burger_test_set.npy")
train_target = np.load("mapped_masks.npy")
test_target = np.load("mapped_masks_test.npy")

# Mise aux dimensions correctes [N, C, H, W]
train_target = np.expand_dims(train_target, axis=3)
test_target = np.expand_dims(test_target, axis=3)
arr_zeros = np.zeros((156, 256, 256, 2))
arr_zeros2 = np.zeros((45, 256, 256, 2))
train_target = np.concatenate((train_target, arr_zeros), axis=-1)
test_target = np.concatenate((test_target, arr_zeros2), axis=-1)

# Définition de la fonction perte et de l'optimiseur
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Paramètres généraux
num_epochs = 30

correct = 0
total = 0

# Entraînement sur CUDA du modèle
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.cuda()



# Boucle d'entraînement principale
for epoch in range(num_epochs):
    # Phase d'entraînement
    model.train()
    batch_size = 12
    train_acc = 0.0

    # Paramètres d'entraînement
    number_slice_trainset = list(range(156))
    index_train = 0
    number_batch_train = 156 // batch_size
    random.shuffle(number_slice_trainset)
    num_correct_train = 0
    train_loss_array = []
    train_accuracy_array = np.array([0])
    train_loss = 0.0

    # Paramètres de test
    test_epoch_loss = 0
    number_slice_testset = list(range(45))
    index_test = 0
    number_batch_test = 45 // batch_size
    random.shuffle(number_slice_testset)
    num_correct_test = 0
    test_loss_array = []
    test_accuracy_array = np.array([0])
    best_test_loss = np.finfo(np.float32).max
    val_loss = 0.0

    # Boucle d'entraînement sur les lots/batch
    for data in range(number_batch_train):
        # Préparation des données
        batch_train_source_numpy = train_source[number_slice_trainset[index_train:index_train + batch_size], :, :]
        batch_train_source_numpy = np.transpose(batch_train_source_numpy, (0, 3, 1, 2))
        batch_train_source_numpy = batch_train_source_numpy/255.0
        batch_train_target_numpy = train_target[number_slice_trainset[index_train:index_train + batch_size], :, :]
        batch_train_target_numpy = np.swapaxes(batch_train_target_numpy, 1, 3)
        batch_train_target_numpy = np.swapaxes(batch_train_target_numpy, 2, 3)

        batch_train_source_cuda = torch.tensor(batch_train_source_numpy).to(device=device, dtype=torch.float32)
        batch_train_target_cuda = torch.tensor(batch_train_target_numpy).to(device=device, dtype=torch.float32)
        batch_train_target_cuda = batch_train_target_cuda.cuda().float()
        batch_train_target_cuda = torch.argmax(batch_train_target_cuda, dim=1)
        result = transform(batch_train_source_cuda, batch_train_target_cuda)

        batch_train_logits_cuda = model(result[0])

        batch_prediction_cuda = torch.softmax(batch_train_logits_cuda, dim=1)
        preds = batch_prediction_cuda.cpu().detach().numpy()
        # Conversion des probabilités en indice de classe
        preds_idx = np.argmax(preds, axis=1)

        # Remplacement des noms de classes par les indices correspondants
        names_for_each_pixel = preds_idx.astype(np.uint8)

        loss = loss_function(batch_train_logits_cuda, batch_train_target_cuda)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        train_loss += loss.item()
        index_train += batch_size

        preds = F.softmax(batch_train_logits_cuda, dim=1).argmax(dim=1)
        num_correct_train += (preds == batch_train_target_cuda).sum()

    train_loss /= number_batch_train
    print(f"epoch {epoch + 1}, train loss: {train_loss}")
    train_acc = num_correct_train / ((torch.numel(batch_train_logits_cuda)) * number_batch_train)


    # Test du modèle sur les données de validation
    model.eval()
    val_acc = 0.0

    # Boucle de validation sur les lots
    with torch.no_grad():
        for data in range(number_batch_test):
            batch_test_source_numpy = test_source[number_slice_testset[index_test:index_test + batch_size], :, :]
            batch_test_source_numpy = np.swapaxes(batch_test_source_numpy, 1, 3)

            batch_test_target_numpy = test_target[number_slice_testset[index_test:index_test + batch_size], :, :]
            batch_test_target_numpy = np.swapaxes(batch_test_target_numpy, 1, 3)

            batch_test_source_cuda = torch.tensor(batch_test_source_numpy).to(device=device, dtype=torch.float32)
            batch_test_target_cuda = torch.tensor(batch_test_target_numpy).to(device=device, dtype=torch.float32)
            batch_test_target_cuda = torch.argmax(batch_test_target_cuda, dim=1)

            outputs = model(batch_test_source_cuda)

            loss = loss_function(outputs, batch_test_target_cuda)
            test_epoch_loss += loss
            index_test += batch_size

            val_loss += loss.item()
            preds = F.softmax(outputs, dim=1).argmax(dim=1)
            acc = (preds == batch_test_target_cuda).float().mean()
            val_acc += acc.item()

    test_epoch_loss /= number_batch_test
    print(f"epoch {epoch + 1}, test loss: {test_epoch_loss}")
    test_loss_array = np.append(test_loss_array, test_epoch_loss.item())
    np.save(os.path.join(numpy_array_path, 'test_loss_array'), test_loss_array)
deep-learning pytorch multiclass-classification unet-neural-network
© www.soinside.com 2019 - 2024. All rights reserved.