Python 3.9.5 火炬1.13.0+cu117 火炬视觉 0.14.0+cu117
我目前正在为图像分类任务训练卷积神经网络(CNN)。我观察到,在训练过程中,测试精度始终超过训练精度,这与预期相反。该网络在 MNIST 数据集上进行训练。 这是我的训练结果:
epoch=1, train loss=0.8197974562644958, train acc=0.7494, test loss=0.1455492526292801, test acc=0.9616
epoch=2, train loss=0.7107925415039062, train acc=0.7788333333333334, test loss=0.1208220049738884, test acc=0.9689
epoch=3, train loss=0.6579669713973999, train acc=0.7906666666666666, test loss=0.11497163027524948, test acc=0.9676
epoch=4, train loss=0.6305248141288757, train acc=0.7994333333333333, test loss=0.10593992471694946, test acc=0.97
epoch=5, train loss=0.5982099771499634, train acc=0.80585, test loss=0.09132635593414307, test acc=0.9714
epoch=6, train loss=0.5825754404067993, train acc=0.8125333333333333, test loss=0.09170813113451004, test acc=0.9723
epoch=7, train loss=0.5688086748123169, train acc=0.8155166666666667, test loss=0.08628570288419724, test acc=0.9737
epoch=8, train loss=0.5556393265724182, train acc=0.8193166666666667, test loss=0.08203426003456116, test acc=0.9762
epoch=9, train loss=0.546567976474762, train acc=0.8213833333333334, test loss=0.08405696600675583, test acc=0.9754
epoch=10, train loss=0.5374698638916016, train acc=0.8239333333333333, test loss=0.07133891433477402, test acc=0.9788
epoch=11, train loss=0.5179286599159241, train acc=0.82975, test loss=0.0744888037443161, test acc=0.9792
epoch=12, train loss=0.5131004452705383, train acc=0.8329, test loss=0.07630482316017151, test acc=0.9778
epoch=14, train loss=0.49787914752960205, train acc=0.8366666666666667, test loss=0.07209591567516327, test acc=0.9779
epoch=15, train loss=0.4968840777873993, train acc=0.83475, test loss=0.07035819441080093, test acc=0.9801
epoch=16, train loss=0.4877821207046509, train acc=0.83925, test loss=0.07009950280189514, test acc=0.9777
epoch=17, train loss=0.48330068588256836, train acc=0.84045, test loss=0.06527410447597504, test acc=0.9809
epoch=18, train loss=0.48005640506744385, train acc=0.8404166666666667, test loss=0.06624794006347656, test acc=0.9781
epoch=19, train loss=0.47614845633506775, train acc=0.8418833333333333, test loss=0.07185563445091248, test acc=0.9788
训练代码:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from pathlib import Path
from CNN import CNNmodel
SEED = 5
device = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 16
data_root = Path("data/")
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
train_transform = transforms.Compose(\[
transforms.TrivialAugmentWide(num_magnitude_bins=8),
transforms.ToTensor()
\])
test_transform = transforms.ToTensor()
train_data = datasets.MNIST(
root=data_root / "train",
train=True,
download=True,
transform=train_transform
)
test_data = datasets.MNIST(
root=data_root / "test",
train=False,
download=True,
transform=test_transform
)
train_dataloader = DataLoader(
train_data,
batch_size=BATCH_SIZE,
shuffle=True
)
test_dataloader = DataLoader(
test_data,
batch_size=BATCH_SIZE,
shuffle=False
)
channel_num = train_data\[0\]\[0\].shape\[0\]
model = CNNmodel(in_shape=channel_num, hidden_shape=8, out_shape=len(train_data.classes)).to(device)
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()
epochs = 20
def train_step(dataloader, loss_fn, optimizer, model, device):
train_loss = 0
train_acc = 0
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
train_loss += loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
train_acc += (y_pred_class == y).sum().item()/len(y_pred)
train_loss /= len(dataloader)
train_acc /= len(dataloader)
return (train_loss, train_acc)
def test_step(dataloader, loss_fn, model, device):
test_loss = 0
test_acc = 0
with torch.inference_mode():
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
test_loss += loss
y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
test_acc += (y_pred_class == y).sum().item()/len(y_pred)
test_loss /= len(dataloader)
test_acc /= len(dataloader)
return (test_loss, test_acc)
for epoch in range(epochs):
train_loss, train_acc = train_step(
dataloader=train_dataloader,
loss_fn=loss_fn,
optimizer=optimizer,
model=model,
device=device
)
test_loss, test_acc = test_step(
dataloader=test_dataloader,
loss_fn=loss_fn,
model=model,
device=device
torch.cuda.empty_cache()
print(f"epoch={epoch}, train loss={train_loss}, train acc={train_acc}, test loss={test_loss}, test acc={test_acc}\n")
这是我的模型架构:
class CNNmodel(nn.Module):
def __init__(self, in_shape, hidden_shape, out_shape) -> None:
super().__init__()
self.conv_block_1 = nn.Sequential(
nn.Conv2d(
in_channels=in_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.Conv2d(
in_channels=hidden_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.conv_block_2 = nn.Sequential(
nn.Conv2d(
in_channels=hidden_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.Conv2d(
in_channels=hidden_shape,
out_channels=hidden_shape,
kernel_size=3,
stride=1,
padding=1
),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2)
)
self.classifier = nn.Sequential(
nn.Flatten(),
nn.Linear(in_features=hidden_shape*7*7,
out_features=out_shape)
)
def forward(self, x):
return self.classifier(self.conv_block_2(self.conv_block_1(x)))
我认为问题出在数据集的安装方式上,但我找不到任何东西
我通过将批量大小设置为 32 解决了这个问题