我在 pytorch 中编写了一些模型,即使经过很多轮之后也无法学到任何东西。为了调试这个问题,我制作了一个简单的模型,对输入的恒等函数进行建模。困难在于,尽管训练了 50k epoch,这个模型也没有学到任何东西,
import torch
import torch.nn as nn
torch.manual_seed(1)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.input = nn.Linear(2,4)
self.hidden = nn.Linear(4,4)
self.output = nn.Linear(4,2)
self.relu = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
self.dropout = nn.Dropout(0.5)
def forward(self,x):
x = self.input(x)
x = self.dropout(x)
x = self.relu(x)
x = self.hidden(x)
x = self.dropout(x)
x = self.relu(x)
x = self.output(x)
x = self.softmax(x)
return x
X = torch.tensor([[1,0],[1,0],[0,1],[0,1]],dtype=torch.float)
net = Net()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(net.parameters(), lr=0.001)
for i in range(100000):
opt.zero_grad()
y = net(X)
loss = criterion(y,torch.argmax(X,dim=1))
loss.backward()
if i%500 ==0:
print("Epoch: ",i)
print(torch.argmax(y,dim=1).detach().numpy().tolist())
print("Loss: ",loss.item())
print()
输出
Epoch: 52500
[0, 0, 1, 0]
Loss: 0.6554909944534302
Epoch: 53000
[0, 0, 0, 0]
Loss: 0.7004914283752441
Epoch: 53500
[0, 0, 0, 0]
Loss: 0.7156486511230469
Epoch: 54000
[0, 0, 0, 0]
Loss: 0.7171240448951721
Epoch: 54500
[0, 0, 0, 0]
Loss: 0.691678524017334
Epoch: 55000
[0, 0, 0, 0]
Loss: 0.7301554679870605
Epoch: 55500
[0, 0, 0, 0]
Loss: 0.728650689125061
我的实施有什么问题?
有几个错误:
optimizer.step()
:optimizer.step()
根据反向传播梯度和其他累积动量等更新参数。
softmax
的使用与CrossEntropy
损失:CrossEntropyLoss
标准将 nn.LogSoftmax()
和 nn.NLLLoss()
组合在一个类中。即它应用 softmax 然后取负对数。因此,在您的情况下,您将采用 softmax(softmax(output))。正确的方法是使用 linear
输出层,同时使用 training
并使用 softmax
层或仅使用 argmax
进行预测。
这会导致欠拟合。
这是更正后的代码:
import torch
import torch.nn as nn
torch.manual_seed(1)
class Net(nn.Module):
def __init__(self):
super().__init__()
self.input = nn.Linear(2,4)
self.hidden = nn.Linear(4,4)
self.output = nn.Linear(4,2)
self.relu = nn.ReLU()
self.softmax = nn.Softmax(dim=1)
# self.dropout = nn.Dropout(0.0)
def forward(self,x):
x = self.input(x)
# x = self.dropout(x)
x = self.relu(x)
x = self.hidden(x)
# x = self.dropout(x)
x = self.relu(x)
x = self.output(x)
# x = self.softmax(x)
return x
def predict(self, x):
with torch.no_grad():
out = self.forward(x)
return self.softmax(out)
X = torch.tensor([[1,0],[1,0],[0,1],[0,1]],dtype=torch.float)
net = Net()
criterion = nn.CrossEntropyLoss()
opt = torch.optim.Adam(net.parameters(), lr=0.001)
for i in range(100000):
opt.zero_grad()
y = net(X)
loss = criterion(y,torch.argmax(X,dim=1))
loss.backward()
# This was missing before
opt.step()
if i%500 ==0:
print("Epoch: ",i)
pred = net.predict(X)
print(f'prediction: {torch.argmax(pred, dim=1).detach().numpy().tolist()}, actual: {torch.argmax(X,dim=1)}')
print("Loss: ", loss.item())
输出:
Epoch: 0
prediction: [0, 0, 0, 0], actual: tensor([0, 0, 1, 1])
Loss: 0.7042869329452515
Epoch: 500
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.1166711300611496
Epoch: 1000
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.05215628445148468
Epoch: 1500
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.02993333339691162
Epoch: 2000
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.01916157826781273
Epoch: 2500
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.01306679006665945
Epoch: 3000
prediction: [0, 0, 1, 1], actual: tensor([0, 0, 1, 1])
Loss: 0.009280549362301826
.
.
.