我正在使用残差神经网络来执行分类任务。不知何故,添加或省略 ReLU 激活会导致自动分级失败。如果有任何关于其原因的见解,我将不胜感激?它无法理解它。 ReLU 不是就地操作,是吗?错误信息:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation
这是网络架构。倒数第三行是未注释掉时导致问题的原因。
class ResidualBlock(nn.Module):
def __init__(self, num_filters, kernel_size):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv1d(num_filters, num_filters, kernel_size=kernel_size, padding='same')
self.bn1 = nn.BatchNorm1d(num_filters)
self.conv2 = nn.Conv1d(num_filters, num_filters, kernel_size=kernel_size, padding='same')
self.bn2 = nn.BatchNorm1d(num_filters)
def forward(self, x):
shortcut = x
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = F.relu(out) # causes the issue when not commented out
out += shortcut
return out
下面是一个最小的工作示例。我正在使用 Python 3.12 和 torch 2.5.1。
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
# Define the ResidualBlock
class ResidualBlock(nn.Module):
def __init__(self, num_filters, kernel_size):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv1d(num_filters, num_filters, kernel_size=kernel_size, padding='same')
self.bn1 = nn.BatchNorm1d(num_filters)
self.conv2 = nn.Conv1d(num_filters, num_filters, kernel_size=kernel_size, padding='same')
self.bn2 = nn.BatchNorm1d(num_filters)
def forward(self, x):
shortcut = x
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out = F.relu(out) # causes the issue
out += shortcut
return out
class SimpleModel(nn.Module):
def __init__(self, num_filters, kernel_size):
super(SimpleModel, self).__init__()
self.res_block = ResidualBlock(num_filters, kernel_size)
self.fc = nn.Linear(num_filters, 1)
def forward(self, x):
x = self.res_block(x)
x = x.mean(dim=2)
x = self.fc(x)
return x
torch.manual_seed(42)
num_samples = 1000
sequence_length = 32
num_filters = 16
X = torch.randn(num_samples, num_filters, sequence_length) # Random input
y = torch.sum(X, dim=(1, 2), keepdim=True) # Simple target (sum of all values)
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
model = SimpleModel(num_filters=num_filters, kernel_size=3)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 5
for epoch in range(epochs):
model.train()
epoch_loss = 0.0
for batch_X, batch_y in dataloader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(dataloader):.4f}")
print("Training complete!")
就地操作是这样的:
out += shortcut
relu
需要自己的输出来计算其梯度!因此,您正在对 relu 的输出进行就地操作,它需要在向后传递中计算其梯度