我是PyTorch的新手,并且已经在CIFAR10上做了一些教程,特别是谷歌Colab,因为我个人还没有GPU进行实验。
我已成功训练了我的神经网络,但我不确定我的代码是否使用Colab的GPU,因为Colab的训练时间并不比我的2014 MacBook Pro(没有GPU)快得多。
我查了一下,我的笔记本确实在运行特斯拉K80,但不知怎的,训练速度很慢。所以我想也许我的代码没有配备GPU语法,但我无法弄清楚那是哪一部分。
# install PyTorch
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision
import torch
import torch.nn as nn
from torch.optim import Adam
from torchvision import transforms
from torch.autograd import Variable
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)
# hyperparameters
n_epochs = 50
n_batch_size = 200
n_display_step = 200
n_learning_rate = 1e-3
n_download_cifar = True
# import cifar
# more about cifar https://www.cs.toronto.edu/~kriz/cifar.html
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset = datasets.CIFAR10(
root="../datasets/cifar",
train=True,
transform=transform,
download=n_download_cifar)
test_dataset = datasets.CIFAR10(
root="../datasets/cifar",
train=False,
transform=transform)
# create data loader
train_loader = DataLoader(train_dataset, batch_size=n_batch_size, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=n_batch_size, shuffle=False)
# build CNN
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
# (3, 32, 32)
self.conv1 = nn.Sequential(
nn.Conv2d(3, 32, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2, 2))
# (32, 16, 16)
self.conv2 = nn.Sequential(
nn.Conv2d(32, 16, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(2, 2))
# (16, 8, 8)
self.out = nn.Linear(16 * 8 * 8, 10)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1)
out = self.out(x)
return out
net = CNN()
net.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = Adam(net.parameters(), lr=n_learning_rate)
def get_accuracy(model, loader):
model.eval()
n_samples = 0
n_correct = 0
with torch.no_grad():
for step, (x, y) in enumerate(loader):
x, y = Variable(x).to(device), Variable(y).to(device)
out = model(x)
_, pred = torch.max(out, 1)
n_samples += y.size(0)
n_correct += (pred == y).sum().item()
return n_correct / n_samples
def train(model, criterion, optimizer, epochs, train_loader, test_loader):
for epoch in range(epochs):
for step, (x, y) in enumerate(train_loader):
model.train()
x, y = Variable(x).to(device), Variable(y).to(device)
out = model(x)
loss = criterion(out, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if step % n_display_step == 0:
print("Epoch {:2d} Loss {:.4f} Accuracy (Train | Test) {:.4f} {:.4f}".format(epoch, loss.item(), get_accuracy(model, train_loader), get_accuracy(model, test_loader)))
train(net, criterion, optimizer, n_epochs, train_loader, test_loader)
您的代码似乎合适,我在MacBook,支持GPU的机器和Google Colab上运行它。我比较了所花费的训练时间,我的实验显示您的代码针对GPU进行了优化。
您可以尝试从this thread运行此代码并查看Google为您分配的GPU内存量吗?我的猜测是你只给了5%的GPU使用率。
问候,
雷克斯。