我正在编写代码示例以在PyTorch中进行简单的线性投影(例如PCA)。除了损失不会改变,随着训练的进行,一切似乎都正常。改变学习率不会对此产生影响,这是一个简单的一维问题,因此损失肯定会在改变。我在这里想念什么?
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as nnF
class PCArot2D(nn.Module):
"2D PCA rotation, expressed as a gradient-descent problem"
def __init__(self):
super(PCArot2D, self).__init__()
self.theta = nn.Parameter(torch.tensor(np.random.random() * 2 * np.pi))
def getrotation(self):
sintheta = torch.sin(self.theta)
costheta = torch.cos(self.theta)
return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
def forward(self, x):
xmeans = torch.mean(x, dim=1, keepdim=True)
rot = self.getrotation()
return torch.mm(rot, x - xmeans)
def covariance(y):
"Calculates the covariance matrix of its input (as torch variables)"
ymeans = torch.mean(y, dim=1, keepdim=True)
ycentred = y - ymeans
return torch.mm(ycentred, ycentred.T) / ycentred.shape[1]
net = PCArot2D()
example2 = torch.tensor(np.random.randn(2, 33))
# define a loss function and an optimiser
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.1)
# train the network
num_epochs = 1000
for epoch in range(num_epochs):
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(torch.DoubleTensor(example2))
# the covariance between output channels is the measure that we wish to minimise
covariance = (outputs[0, :] * outputs[1, :]).mean()
loss = criterion(covariance, torch.tensor(0, dtype=torch.double))
loss.backward()
optimizer.step()
running_loss = loss.item()
if ((epoch & (epoch - 1)) == 0) or epoch==(num_epochs-1): # don't print on all epochs
# print statistics
print('[%d] loss: %.8f' %
(epoch, running_loss))
print('Finished Training')
输出:
[0] loss: 0.00629047
[1] loss: 0.00629047
[2] loss: 0.00629047
[4] loss: 0.00629047
[8] loss: 0.00629047
etc
似乎问题出在您的getrotation
函数中。当从其他张量创建新的张量时,它不再具有后概率:
def getrotation(self):
sintheta = torch.sin(self.theta)
costheta = torch.cos(self.theta)
return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
因此,您需要找到其他方法来构造返回张量。
这里有一个建议似乎可以使用torch.cat
:
def getrotation(self):
sintheta = torch.sin(self.theta)
costheta = torch.cos(self.theta)
#return torch.tensor([[costheta, -sintheta], [sintheta, costheta]], requires_grad=True, dtype=torch.double)
A = torch.cat([costheta.unsqueeze(0), -sintheta.unsqueeze(0)], dim=0)
B = torch.cat([sintheta.unsqueeze(0), costheta.unsqueeze(0)], dim=0)
return torch.cat([A.unsqueeze(0), B.unsqueeze(0)], dim=0).double()
实施此更改后,损失更改:
[0] loss: 0.00765365
[1] loss: 0.00764726
[2] loss: 0.00764023
[4] loss: 0.00762607
[8] loss: 0.00759777
[16] loss: 0.00754148
[32] loss: 0.00742997
[64] loss: 0.00721117
[128] loss: 0.00679025
[256] loss: 0.00601233
[512] loss: 0.00469085
[999] loss: 0.00288501
Finished Training
我希望这会有所帮助!
def getrotation(self):
sintheta = torch.sin(net.theta).double().unsqueeze(0)
costheta = torch.cos(net.theta).double().unsqueeze(0)
return torch.cat([costheta, -sintheta, sintheta, costheta]).reshape((2,2))