验证损失根本没有改变

问题描述 投票:0回答:1

我第一次使用 PyTorch 使用 Bert 的预训练模型来训练我的情绪分析模型。 这是我的分类器

class SentimentClassifier2(nn.Module):

  def __init__(self, n_classes):
    super(SentimentClassifier2, self).__init__()
    D_in, H, D_out = 768, 200, 3

    self.bert = BertModel.from_pretrained(PRE_TRAINED_MODEL_NAME)
    self.drop = nn.Dropout(p=0.4)

    self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            nn.Linear(H, D_out)
    )
  def forward(self, input_ids, attention_mask):

         _, pooled_output = self.bert( input_ids=input_ids, attention_mask=attention_mask, return_dict = False)
         output = self.drop(pooled_output)
         logits = self.classifier(output)
         return logits

这是我的优化器/损失函数(我只做了 20 轮,因为需要一段时间来训练)

EPOCHS = 20

model2 = SentimentClassifier2(len(class_names))
model2= model2.to(device)

optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=True)

total_steps = len(train_data_loader) * EPOCHS

scheduler = get_linear_schedule_with_warmup(
  optimizer,
  num_warmup_steps=0,
  num_training_steps=total_steps
)
loss_fn = nn.CrossEntropyLoss().to(device)

培训和评估代码

def train_epoch( model, data_loader, loss_fn,optimizer, device, scheduler,  n_examples):
  model = model.train()
  losses = []
  correct_predictions = 0
  for d in data_loader:
    input_ids = d["input_ids"].to(device)
    attention_mask = d["attention_mask"].to(device)
    targets = d["targets"].to(device)

    outputs = model(
      input_ids=input_ids,
      attention_mask=attention_mask
    )

    _, preds = torch.max(outputs, dim=1)
    loss = loss_fn(outputs, targets)

    correct_predictions += torch.sum(preds == targets)
    losses.append(loss.item())

    loss.backward()
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()

  return correct_predictions.double() / n_examples, np.mean(losses)


def eval_model(model, data_loader, loss_fn, device, n_examples):
  model = model.eval()

  losses = []
  correct_predictions = 0
  with torch.no_grad():
    for d in data_loader:
      input_ids = d["input_ids"].to(device)
      attention_mask = d["attention_mask"].to(device)
      targets = d["targets"].to(device)

      outputs = model(
        input_ids=input_ids,
        attention_mask=attention_mask
      )

      _, preds = torch.max(outputs, dim=1)
      loss = loss_fn(outputs, targets)

      correct_predictions += torch.sum(preds == targets)
      losses.append(loss.item())

  return correct_predictions.double() / n_examples, np.mean(losses)

我的问题:验证样本的损失根本没有改变!

epoch1:______________________
Train loss 1.0145157482929346 accuracy 0.4185746994848311
Val   loss 1.002384223589083 accuracy 0.4151087371232354
epoch2:______________________
Train loss 1.015038197996413 accuracy 0.41871780194619346
Val   loss 1.002384223589083 accuracy 0.4151087371232354
epoch3:______________________
Train loss 1.014710763787351 accuracy 0.4188609044075558
Val   loss 1.002384223589083 accuracy 0.4151087371232354
epoch4:______________________
Train loss 1.0139196826735648 accuracy 0.41909940850982635
Val   loss 1.002384223589083 accuracy 0.4151087371232354

我不明白问题是什么......

machine-learning deep-learning pytorch loss-function bert-language-model
1个回答
0
投票

也许你可以尝试一下。正在关注,

loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.zero_grad()

optimizer.step()
scheduler.step()
最新问题
© www.soinside.com 2019 - 2025. All rights reserved.