我正在尝试在 Pytorch 上使用 Mc Dropout 实现贝叶斯 CNN, 主要思想是,通过在测试时应用 dropout 并运行多次前向传递,您可以从各种不同的模型中获得预测。 我找到了 Mc Dropout 的一个应用,但我真的不明白他们是如何应用这个方法的,以及他们到底是如何从预测列表中选择正确的预测
def mcdropout_test(model):
model.train()
test_loss = 0
correct = 0
T = 100
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output_list = []
for i in xrange(T):
output_list.append(torch.unsqueeze(model(data), 0))
output_mean = torch.cat(output_list, 0).mean(0)
test_loss += F.nll_loss(F.log_softmax(output_mean), target, size_average=False).data[0] # sum up batch loss
pred = output_mean.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nMC Dropout Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
train()
mcdropout_test()
我已经更换了
data, target = Variable(data, volatile=True), Variable(target)
通过添加
一开始with torch.no_grad():
这就是我定义 CNN 的方式
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 192, 5, padding=2)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(192, 192, 5, padding=2)
self.fc1 = nn.Linear(192 * 8 * 8, 1024)
self.fc2 = nn.Linear(1024, 256)
self.fc3 = nn.Linear(256, 10)
self.dropout = nn.Dropout(p=0.3)
nn.init.xavier_uniform_(self.conv1.weight)
nn.init.constant_(self.conv1.bias, 0.0)
nn.init.xavier_uniform_(self.conv2.weight)
nn.init.constant_(self.conv2.bias, 0.0)
nn.init.xavier_uniform_(self.fc1.weight)
nn.init.constant_(self.fc1.bias, 0.0)
nn.init.xavier_uniform_(self.fc2.weight)
nn.init.constant_(self.fc2.bias, 0.0)
nn.init.xavier_uniform_(self.fc3.weight)
nn.init.constant_(self.fc3.bias, 0.0)
def forward(self, x):
x = self.pool(F.relu(self.dropout(self.conv1(x)))) # recommended to add the relu
x = self.pool(F.relu(self.dropout(self.conv2(x)))) # recommended to add the relu
x = x.view(-1, 192 * 8 * 8)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(self.dropout(x)))
x = self.fc3(self.dropout(x)) # no activation function needed for the last layer
return x
任何人都可以帮助我在 CNN 上正确实现 Monte Carlo Dropout 方法吗?
在 Pytorch 中实现 MC Dropout 很容易。所需要做的就是将模型的 dropout 层设置为训练模式。这允许在不同的前向传递期间使用不同的漏失掩模。下面是 Pytorch 中 MC Dropout 的实现,说明了如何将来自各种前向传递的多个预测堆叠在一起并用于计算不同的不确定性指标。
import sys
import numpy as np
import torch
import torch.nn as nn
def enable_dropout(model):
""" Function to enable the dropout layers during test-time """
for m in model.modules():
if m.__class__.__name__.startswith('Dropout'):
m.train()
def get_monte_carlo_predictions(data_loader,
forward_passes,
model,
n_classes,
n_samples):
""" Function to get the monte-carlo samples and uncertainty estimates
through multiple forward passes
Parameters
----------
data_loader : object
data loader object from the data loader module
forward_passes : int
number of monte-carlo samples/forward passes
model : object
keras model
n_classes : int
number of classes in the dataset
n_samples : int
number of samples in the test set
"""
dropout_predictions = np.empty((0, n_samples, n_classes))
softmax = nn.Softmax(dim=1)
for i in range(forward_passes):
predictions = np.empty((0, n_classes))
model.eval()
enable_dropout(model)
for i, (image, label) in enumerate(data_loader):
image = image.to(torch.device('cuda'))
with torch.no_grad():
output = model(image)
output = softmax(output) # shape (n_samples, n_classes)
predictions = np.vstack((predictions, output.cpu().numpy()))
dropout_predictions = np.vstack((dropout_predictions,
predictions[np.newaxis, :, :]))
# dropout predictions - shape (forward_passes, n_samples, n_classes)
# Calculating mean across multiple MCD forward passes
mean = np.mean(dropout_predictions, axis=0) # shape (n_samples, n_classes)
# Calculating variance across multiple MCD forward passes
variance = np.var(dropout_predictions, axis=0) # shape (n_samples, n_classes)
epsilon = sys.float_info.min
# Calculating entropy across multiple MCD forward passes
entropy = -np.sum(mean * np.log(mean + epsilon), axis=-1) # shape (n_samples,)
# Calculating mutual information across multiple MCD forward passes
mutual_info = entropy - np.mean(np.sum(-dropout_predictions * np.log(dropout_predictions + epsilon),
axis=-1), axis=0) # shape (n_samples,)
继续讨论上面问题中发布的实现,通过首先将模型设置为训练模式(
model.train()
)来获得来自 T 个不同前向传递的多个预测。请注意,这是不可取的,因为如果模型中存在除 dropout 之外的其他层(例如批量范数),预测中将会引入不需要的随机性。因此,最好的方法是将 dropout 层设置为训练模式,如上面的代码片段所示。