如何确保在pytorch中的链路预测GNN模型中可重复性？

Question

I'M训练图形神经网络（GNN）模型，用于Pytorch中的链接预测。我已经采取了几个步骤来确保可重复性，但是即使我期望始终如一的结果，我仍会在不同的跑步中获得不同的结果。我为确保可重复性所做的工作是

设置有关库的所有种子（pytorch，numpy，andan等）。确保我的培训数据是静态的，并且在培训过程中不要随机。但是，我注意到该模型在第一个时期的第一个时期内生成相同的特征向量，但是边缘特征（链接预测层的输出）并不相同。基于此，我怀疑问题可能与链接预测层本身有关。我对GNNS和Pytorch的新鲜感相对较新，因此我感谢任何见解或建议或帮助：

当边缘特征即使其他模型参数看起来可重现，边缘功能可能不一致。

任何潜在的代码或模型相关的更改，以帮助确保每次运行的结果相同。

如何验证所有随机性源是否在链接预测层或模型的任何其他组件中控制。

def split_data_balance(edge_index, edge_label, test_size=0.2, random_state=seed_value):
    edge_label = edge_label.numpy()

    train_idx, test_idx = train_test_split(range(len(edge_label)), test_size=test_size, stratify=edge_label, random_state=random_state)

    train_edge_index = edge_index[:, train_idx]
    test_edge_index = edge_index[:, test_idx]
    train_edge_label = edge_label[train_idx]
    test_edge_label = edge_label[test_idx]

    train_edge_label = torch.tensor(train_edge_label)
    test_edge_label = torch.tensor(test_edge_label)

    return train_edge_index, test_edge_index, train_edge_label, test_edge_label
    
edg_idx_train, edg_idx_test, edg_lable_train, edg_lable_test = split_data_balance(edge_index, edge_weight)
class GCN_linkPrediction(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim, num_layers,
                dropout, return_embeds=False):

        super(GCN_linkPrediction, self).__init__()

        self.convs = torch.nn.ModuleList([GCNConv(in_channels=input_dim if i == 0 else hidden_dim, out_channels=hidden_dim) for i in range(num_layers-1)])
        self.batchNorm = torch.nn.ModuleList([torch.nn.BatchNorm1d(num_features=hidden_dim) for i in range(num_layers)])
        self.convs.append(GCNConv(in_channels=hidden_dim, out_channels=out_dim))

        self.dropout = dropout

        self.return_embeds = return_embeds

        self.linear = torch.nn.Linear(out_dim*2, 3)


    def reset_parameters(self):
        for conv in self.convs:
            conv.reset_parameters()
        for batchN in self.batchNorm:
            batchN.reset_parameters()

    def forward(self, x, adj_t): 
        for i in range(len(self.convs)-1):
            x = self.convs[i](x, adj_t) # x is the node feature tensor and adj_t is the edge_index tensor
            x = self.batchNorm[i](x)
            x = torch.nn.functional.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.convs[-1](x, adj_t)

        out = x

        return out


    def link_prediction(self, x, adj_t):
        scr, tgt = adj_t
        nodef_scr = x[scr]
        nodef_tgt = x[tgt]

        link_feature = torch.cat([nodef_scr, nodef_tgt], dim=-1)

        link = self.linear(link_feature)

        return link
def train(model, x, adj_t, lable, optimizer, loss_fn):

    model.train()
    optimizer.zero_grad()
    
    x = model(x, adj_t)
    link_weight = model.link_prediction(x, adj_t)
    y_pred = torch.argmax(link_weight, dim=1)
    
    loss = loss_fn(link_weight, lable.long())
    
    loss.backward()
    optimizer.step()
    
    return loss.item(), link_weight, y_pred

def test(model, x, adj_t, lable, save_model_results=False):
    model.eval()
    
    x = model(x, adj_t)
    link_weight = model.link_prediction(x, adj_t)

    y_pred = torch.argmax(link_weight, dim=1)
    true_pred = (y_pred == lable).sum().item()  
    accuracy = true_pred / len(adj_t)

    return accuracy, link_weight, y_pred
args = {'num_layers': 4,
      'hidden_dim': 150,
      'out_dim': 60, 
      'dropout': 0.5,
      'lr': 0.01,
      'epochs': 1,
  }
    
model = GCN_linkPrediction(node_features.shape[1], args['hidden_dim'],
              args['out_dim'], args['num_layers'],
              args['dropout'])
seed_value = 42  

random.seed(seed_value)
np.random.seed(seed_value)
torch.manual_seed(seed_value)

torch.backends.cudnn.deterministic = True  # Force deterministic algorithms
torch.backends.cudnn.benchmark = False  # Disable cuDNN's benchmarking feature (can cause non-determinism)
torch.use_deterministic_algorithms(True) # Set torch to use deterministic algo

model.reset_parameters()

optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = torch.nn.CrossEntropyLoss() # classification

best_model = None
best_test_acc = 0

train_losses = []

range_epoch = args['epochs']
for epoch in range(1,range_epoch+1):
    train_loss, raw_train, train_pre = train(model, node_features, edg_idx_train, edg_lable_train, optimizer, loss_fn)
    train_losses.append(train_loss)
    
    with torch.no_grad():
        result, raw_test, test_pre = test(model, node_features, edg_idx_test, edg_lable_test)
    
    prediction_accuracy = result
    
    print(f'Epoch: {epoch:02d}, '
          f'Loss: {train_loss:.4f}, '
          f'Test: {100 * prediction_accuracy:.2f}%, ')

如何确保在pytorch中的链路预测GNN模型中可重复性？

问题描述投票：0回答：0

最新问题

如何确保在pytorch中的链路预测GNN模型中可重复性？

问题描述 投票：0回答：0

最新问题

问题描述投票：0回答：0