(编辑:添加了整个代码)我正在尝试运行以下命令:
import os
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
from PIL import Image
from IPython.display import display
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.optim.lr_scheduler import StepLR
from torchinfo import summary
from tqdm import tqdm
data_path = 'img\chest_xray' #(https://www.kaggle.com/datasets/tolgadincer/labeled-chest-xray-images)
train_transform = transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ColorJitter(brightness=0.10, contrast=0.1, saturation=0.10, hue=0.1),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
test_transform = transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406],
[0.229, 0.224, 0.225])])
train_data = datasets.ImageFolder(
os.path.join(data_path, 'train'), transform=train_transform
)
test_data = datasets.ImageFolder(
os.path.join(data_path, 'test'), transform=test_transform
)
train_loader = DataLoader(train_data, batch_size=16, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False, pin_memory=True)
class_names = train_data.classes
print(class_names)
print(f'Number of train images: {len(train_data)}')
print(f'Number of test images: {len(test_data)}')
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#INPUT BLOCK
self.convblock1 = nn.Sequential(
nn.Conv2d(
in_channels=3, out_channels=8, kernel_size=(3,3),padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(4),
)
self.pool11 = nn.MaxPool2d(2,2)
# CONVOLUTION BLOCK
self.convblock2 = nn.Sequential(
nn.Conv2d(
in_channels=8, out_channels=16, kernel_size=(3,3),padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(16),
)
self.pool22 = nn.MaxPool2d(2,2)
# TRANSITION BLOCK
self.convblock3 = nn.Sequential(
nn.Conv2d(
in_channels=16, out_channels=10, kernel_size=(1,1), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(10),
)
self.pool33 = nn.MaxPool2d(2,2)
# CONVOLUTION BLOCK
self.convblock4 = nn.Sequential(
nn.Conv2d(
in_channels=10, out_channels=10, kernel_size=(3,3), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(10),
)
self.convblock5 = nn.Sequential(
nn.Conv2d(
in_channels=10, out_channels=32, kernel_size=(1,1), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(32),
)
self.convblock6 = nn.Sequential(
nn.Conv2d(
in_channels=32, out_channels=10, kernel_size=(1,1), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(10),
)
self.convblock7 = nn.Sequential(
nn.Conv2d(
in_channels=10, out_channels=10, kernel_size=(3,3), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(10),
)
self.convblock8 = nn.Sequential(
nn.Conv2d(
in_channels=10, out_channels=32, kernel_size=(1,1), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(32),
)
self.convblock9 = nn.Sequential(
nn.Conv2d(
in_channels=32, out_channels=10, kernel_size=(1,1), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(10),
)
self.convblock10 = nn.Sequential(
nn.Conv2d(
in_channels=10, out_channels=14, kernel_size=(3,3), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(14),
)
self.convblock11 = nn.Sequential(
nn.Conv2d(
in_channels=14, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
nn.ReLU(),
nn.BatchNorm2d(16),
)
# OUTPUT BLOCK
self.gap = nn.Sequential(
nn.AvgPool2d(kernel_size=4)
)
self.convblockout = nn.Sequential(
nn.Conv2d(
in_channels=16, out_channels=2, kernel_size=(4,4), padding=0, bias=False),
)
def forward(self,x):
x = self.convblock1(x)
x = self.pool11(x)
x = self.convblock2(x)
x = self.pool22(x)
x = self.convblock3(x)
x = self.pool33(x)
x = self.convblock4(x)
x = self.convblock5(x)
x = self.convblock6(x)
x = self.convblock7(x)
x = self.convblock8(x)
x = self.convblock9(x)
x = self.convblock10(x)
x = self.convblock11(x)
x = self.gap(x)
x = self.convblockout(x)
x = x.view(-1,2)
return F.log_softmax(x, dim=-1)
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
print(f'Avaliable processor: {device}')
model = Net().to(device)
summary(model, input_size=(3,224,224), col_names=('output_size', 'num_params'))
train_losses = []
test_losses = []
train_acc = []
test_acc = []
def train(model, device, train_loader, optimizer, epoch):
model.train()
pbar = tqdm(train_loader)
correct = 0
proccessed = 0
for batch_idx, (data, target) in enumerate(pbar):
# Get data.
data, target = data.to(device), target.to(device)
# Initialization of gradient.
optimizer.zero_grad()
# Prediction on data.
y_pred = model(data)
# Calculate the loss, given the prediction. (Cost function).
loss = F.nll_loss(y_pred, target)
train_losses.append(loss)
# Backpropagation.
loss.backward()
optimizer.step()
# Track the training loss and accuracy.
pred = y_pred.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
proccessed += len(data)
pbar.set_description(
desc=f'Loss={loss.item()} '
f'Batch_id={batch_idx} '
f'Accuracy={100*correct/proccessed:.2f}'
)
train_acc.append(100*correct/proccessed)
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item()
pred = output.argmax(dim=1, keepdim=True)
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
test_losses.append(test_loss)
print(
f'\nTest set: '
f'Average loss: {test_loss:.4f}, '
f'Accuracy: {correct}/{len(test_loader.dataset)} '
f'({100. * correct / len(test_loader.dataset):.2f}%)\n'
)
test_acc.append(100.*correct/len(test_loader.dataset))
model = Net().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.5)
EPOCHS = 15
for epoch in range(EPOCHS):
print("EPOCH:", epoch)
train(model, device, train_loader, optimizer, epoch)
scheduler.step()
print('current Learning Rate: ', optimizer.state_dict()['param_groups'][0]['lr'])
test(model, device, test_loader)
我的第一次运行成功了,因为
nn.BarchNorm2d()
被禁用了。现在我启用了它,我收到以下错误:Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [Conv2d: 2, ReLU: 2]
和ValueError: expected 4D input (got 3D input)
。据我了解,在transform.ToTensor()期间,输入图像(3通道,224H,224W)被转换为Pytorch张量。 nn.BatchNorm2d()
正在尝试标准化 4D 张量,但正在接收 3D 张量。我将如何改变变换以使张量为 4D(批量)?我读到了有关 .unsqueeze
的内容,但所有示例都在单个张量上。
模型期望将一批图像作为大小为
(batch_size, channels, height, width)
的 4D 张量。如果您传递单个图像,则需要添加批次维度。您可以使用 x.unsqueeze(0)
或 x[None]
来完成此操作。
也就是说,您应该检查数据加载器中发生的情况并找出它产生错误张量的原因。