AutoModelForSequenceClassification 损失没有减少

问题描述 投票:0回答:1
from datasets import load_dataset
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from tqdm import tqdm

def train_one_epoch(model, dataloader, optimizer):
    model.train()
    loss_list = []
    for batch in tqdm(dataloader):
        batch_data = {
            'input_ids': batch['input_ids'],
            'attention_mask': batch['attention_mask'],
            'labels': batch['labels']
        }
        loss = model(**batch_data).loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        loss_list.append(loss.detach().item())
    avg_loss = sum(loss_list) / len(loss_list)
    print('avg loss in epoch:', avg_loss)

def evaluate(model, dataloader):
    model.eval()
    all_labels = []
    all_predictions = []
    for batch in dataloader:
        with torch.no_grad():
            batch_data = {
                'input_ids': batch['input_ids'],
                'attention_mask': batch['attention_mask']
            }
            logits = model(**batch_data).logits
            predictions = torch.argmax(logits, dim=-1)
            labels = batch['labels']
            all_labels.extend(labels)
            all_predictions.extend(predictions)
    accuracy = compute_accuracy(all_predictions, all_labels)
    print("Accuracy", accuracy)
    return accuracy

def compute_accuracy(predictions, labels):
    correct = 0
    for pred, label in zip(predictions, labels):
        if pred == label:
            correct += 1
    return correct / len(labels)

def my_collate_fn(batched_samples):
    texts = [example['text'] for example in batched_samples]
    labels = [example['label'] for example in batched_samples]
    text_encoding = tokenizer(texts, max_length=128, truncation=True, padding=True, return_tensors='pt')
    labels = torch.LongTensor(labels)
    return {
        'input_ids': text_encoding['input_ids'].cuda(),
        'attention_mask': text_encoding['attention_mask'].cuda(),
        'labels': labels.cuda()
    }

torch.manual_seed(64)
batch_size = 16
learning_rate = 5e-5
num_epochs = 10
model_name = "roberta-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

model = model.cuda()

optimizer = torch.optim.AdamW(params=model.parameters(), lr=learning_rate, eps=1e-8)

datasets = load_dataset("gpt3mix/sst2")

train_dataloader = DataLoader(
    datasets['train'],
    batch_size=8,
    shuffle=True,
    collate_fn=my_collate_fn,
    num_workers=0
)

validation_dataloader = DataLoader(
    datasets['validation'],
    batch_size=8,
    shuffle=False,
    collate_fn=my_collate_fn,
    num_workers=0
)

best_acc = 0.0
for epoch in range(1, num_epochs + 1):
    train_one_epoch(model, train_dataloader, optimizer)
    valid_acc = evaluate(model, validation_dataloader)

100%|██████████| 865/865 [01:27<00:00, 9.89it/s]

epoch 的平均损失:0.6746856869559068

准确度0.4908256880733945

100%|██████████| 865/865 [01:25<00:00, 10.09it/s]

纪元平均损失:0.6922555248516833

准确度0.4908256880733945

100%|██████████| 865/865 [01:27<00:00, 9.89it/s]

epoch 的平均损失:0.6976809655310791

准确度0.5091743119266054

改变学习率也不起作用

python machine-learning nlp huggingface
1个回答
0
投票

您没有指定 SequenceClassification 模型中的标签数量,这通常允许模型为每个数据点预测相同的类别。

只需将加载模型的部分修改为:

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
© www.soinside.com 2019 - 2024. All rights reserved.