我有以下网络来执行一些轨迹的二元分类:
class DCR(nn.Module):
def __init__(self, kemb_size, nvar, points, device):
super().__init__()
self.phis = load_phis_dataset()
self.kemb = get_kernel_embedding(self.phis, nvar, samples = kemb_size).to(device) # (concepts, kemb_size)
_ = self.kemb.requires_grad_()
self.fc1 = nn.Linear(kemb_size + (nvar*points), 64)
self.fc2 = nn.Linear(64, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
# concept truth degrees
rhos = get_robustness(x, self.phis, time = False) # (trajectories, concepts)
_ = rhos.requires_grad_()
# embed trajectories in kernel space
traj_emb = torch.matmul(rhos, self.kemb) # (trajectories, kemb_size)
_ = traj_emb.requires_grad_()
# combine info from traj_embed and x to predict class
x_new = x.view(x.size(0), -1) # flatten x
combined_features = torch.cat((traj_emb, x_new), dim=1) # (trajectories, kemb_size + x.shape[0]*x.shape[1])
output = self.fc1(combined_features)
output = F.relu(output)
output = self.fc2(output)
output = self.sigmoid(output)
return output.squeeze(1)
model = DCR(kemb_size, nvar, points, device).to(device)
criterion = nn.BCELoss().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01)
model.train()
for epoch in range(10):
epoch_loss = 0.0
for batch, labels in train_loader:
batch, labels = batch.to(device), labels.to(device)
y_preds = model(batch)
loss = criterion(y_preds, labels.float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += y_preds.shape[0] * loss.item()
print(f'Epoch: {epoch}, Loss: {epoch_loss/len(y_train):.5f}')
然而,训练损失在每个时期都保持完全恒定,并且权重不会更新。我可能做错了什么?
我尝试用一些
.requires_grad_()
解决问题,但没有成功。
所有形状均作为代码中的注释。
试试这个:我删除了损失计算中的
y_preds.shape[0] *
for epoch in range(10):
epoch_loss = 0.0
for batch, labels in train_loader:
batch, labels = batch.to(device), labels.to(device)
optimizer.zero_grad()
y_preds = model(batch)
loss = criterion(y_preds, labels.float())
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print(f'Epoch: {epoch}, Loss: {epoch_loss/len(y_train):.5f}')