我正在创建 CNN 模型来识别狗和猫。我训练了它,当我手动评估它的准确性时,它对未见过的数据的准确性为 80-85%。
但是,当我尝试使用库 torchmetrics.accuracy 来计算我的准确性时,由于某种原因我得到了错误的准确性计算。让我解释一下:
模型的代码(我使用python、torch、lightning来优化模型和代码):
import lightning as L
import torch
import torchmetrics
import torchvision
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
from CustomDataset import CustomDataset
class Model(L.LightningModule):
def __init__(self, batch_size, learning_rate, num_classes):
super(Model, self).__init__()
self.save_hyperparameters()
## HERE GOES MODEL LAYERS CRITERION etc
self.accuracy = torchmetrics.Accuracy(num_classes=2, average='macro', task='multiclass')
self.test_transform = transforms.Compose([
transforms.Resize((200, 200)), # Resize images to 256x256
transforms.ToTensor(), # Convert images to PyTorch tensors
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize images
])
self.transform = transforms.Compose([
transforms.RandomResizedCrop(200), # Randomly crops and resizes images to 224x224
transforms.RandomHorizontalFlip(p=0.5), # Randomly flips images horizontally
transforms.RandomRotation(15), # Resize images to 256x256
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(), # Convert images to PyTorch tensors
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize images
])
def forward(self, image):
image = F.relu(self.conv1(image))
image = self.pool(image)
image = F.relu(self.conv2(image))
image = self.pool(image)
image = F.relu(self.conv3(image))
image = self.pool(image) # Output is now (128, 25, 25)
image = torch.flatten(image, 1) # Flatten the output
image = F.relu(self.fc1(image))
image = self.fc2(image)
return image
def training_step(self, batch, batch_idx):
images, labels = batch
predictions = self(images) # Forward pass
loss = self.criterion(predictions, labels) # Compute the loss
predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
predictions_softmax = F.softmax(predictions, dim=1)
acc = self.accuracy(predictions_softmax, labels)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
return loss # Returning the loss for backpropagation
def validation_step(self, batch, batch_idx):
images, labels = batch
predictions = self(images)
loss = self.criterion(predictions, labels)
predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
predictions_softmax = F.softmax(predictions, dim=1)
acc = self.accuracy(predictions_softmax, labels)
self.log('val_loss', loss, prog_bar=True)
self.log('val_acc', acc, prog_bar=True)
return loss
def test_step(self, batch, batch_idx):
images, labels = batch
predictions = self(images) # Forward pass
loss = self.criterion(predictions, labels) # Compute the loss
predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
predictions_softmax = F.softmax(predictions, dim=1)
acc = self.accuracy(predictions_softmax, labels)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
self.log('train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
return loss # Returning the loss for backpropagation
# images, labels = batch
# predictions = self(images)
# loss = self.criterion(predictions, labels)
# predicted_classes = torch.argmax(F.softmax(predictions, dim=1), dim=1)
# predictions_softmax = F.softmax(predictions, dim=1)
# acc = self.accuracy(predictions_softmax, labels)
# real_step_acc = (labels == predicted_classes).sum() / self.batch_size
# self.log('test_loss', loss, prog_bar=True)
# self.log('real_test_acc', real_step_acc, prog_bar=True)
# self.log('test_acc', acc, prog_bar=True)
# return loss
def configure_optimizers(self):
optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate, momentum=0.9)
return optimizer
def train_dataloader(self):
# Set up and return the training DataLoader
filepath_train = "dataset/test/"
train_dataset = datasets.ImageFolder(root=filepath_train, transform=self.transform)
train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=False, num_workers=16)
return train_loader
def test_dataloader(self):
# Set up and return the training DataLoader
filepath_train = "dataset/test/"
test_dataset = datasets.ImageFolder(root=filepath_train, transform=self.transform)
test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=True, num_workers=16)
return test_loader
def val_dataloader(self):
# Set up and return the validation DataLoader
filepath_train = "dataset/val/"
val_dataset = datasets.ImageFolder(root=filepath_train, transform=self.test_transform)
val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=16)
return val_loader
输出是这样的: train_acc_epoch 0.7635096907615662 真实测试acc 0.7901701927185059 test_acc 0.39825108647346497
我计算的真实测试准确度是这样的:
predictions_softmax = F.softmax(predictions, dim=1)
acc = self.accuracy(predictions_softmax, labels)
real_step_acc = (labels == predicted_classes).sum() / self.batch_size
所以问题是: 当我运行测试时,test_step 方法中的测试准确度是 40%,但我自己计算的实际测试准确度是 80-85%。 所以我尝试过: 当我对测试数据启用洗牌时(我知道这是不好的做法,但它是调试的一部分),torchmetrics.accuracy 变得正确!它输出 80-85% 的准确度。
那么为什么洗牌会改变事情呢?请大家帮帮我,我认为这也可能是某种错误。
我找到了答案。一个问题是我正在记录包含精度的张量对象。由于它是每批次计算的多类准确度,因此当我有一批未整理的数据(例如 64 只猫)时,它为该批次的狗提供 0% acc,然后对准确度进行平均。因此我得到了 40% 的准确率。现在为了解决这个问题,我传递了一个实际的准确度对象,因此闪电将在整个运行过程中累积每个类别的准确度,并正确给出准确度。希望有一天它能帮助别人!
正确代码:
self.accuracy(predictions_softmax, labels)
self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
self.log('train_acc', self.accuracy, on_step=True, on_epoch=True, prog_bar=True)