如何将 CRF 层添加到用于 NER 任务的 BERT 模型？

Question

我使用 BERT 创建了一个 NER 模型来检测医疗实体，效果非常好。我正在尝试在 BERT 模型之上添加 CRF 层以增强其性能，但出现了一个我似乎无法解决的错误。

这是错误：

ValueError                                Traceback (most recent call last)
<ipython-input-32-99c3c401704b> in <cell line: 85>()
     83 
     84 # Start training
---> 85 trainer.train()

7 frames
/usr/local/lib/python3.10/dist-packages/torchcrf/__init__.py in _validate(self, emissions, tags, mask)
    165             no_empty_seq_bf = self.batch_first and mask[:, 0].all()
    166             if not no_empty_seq and not no_empty_seq_bf:
--> 167                 raise ValueError('mask of the first timestep must all be on')
    168 
    169     def _compute_score(

ValueError: mask of the first timestep must all be on

至于我的代码，这里是：

from transformers import TrainingArguments, Trainer
from torchcrf import CRF
import torch.nn as nn
from transformers import DataCollatorForTokenClassification
from transformers import AutoTokenizer, BertTokenizerFast

def tokenize_and_align_labels(examples):

    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):

        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100 so they are automatically
            # ignored in the loss function
            if word_idx is None:
                label_ids.append(-100)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            # For the other tokens in a word, we set the label to either the current label or -100, depending on
            # the label_all_tokens flag.
            else:
                label_ids.append(label[word_idx] if label_all_tokens else -100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs



label_all_tokens = False

tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')
tokenized_data = my_dataset_dict.map(tokenize_and_align_labels, batched=True)
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

class BERT_CRF_Model(nn.Module):
    def __init__(self, bert_model, num_labels):
        super(BERT_CRF_Model, self).__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
        self.crf = CRF(num_labels, batch_first=True)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = self.dropout(outputs[0])  # Last hidden state
        emissions = self.classifier(sequence_output)

        if labels is not None:
            # CRF loss
            loss = -self.crf(emissions, labels, mask=attention_mask.bool(), reduction='mean')
            return loss
        else:
            # CRF decoding (prediction)
            prediction = self.crf.decode(emissions, mask=attention_mask.bool())
            return emissions  # Make sure to return emissions here


class CustomTrainer(Trainer):
    def __init__(self, *args, crf_layer=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.crf_layer = crf_layer

    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")  # Extraire les labels
        emissions = model(**inputs)  # Obtenir les émissions du modèle
        
        emissions = torch.stack(emissions) if isinstance(emissions, list) else emissions 

        # Vérifiez le masque d'attention
        mask = inputs["attention_mask"].bool()
        if mask.size(0) == 0 or mask[:, 0].sum() == 0:
            raise ValueError("Le masque du premier pas de temps doit être activé")

        # Calculer la perte CRF
        loss = -self.crf_layer(emissions, labels, mask=mask)

        return (loss, inputs) if return_outputs else loss




# Load BERT model
from transformers import BertModel

bert_model = BertModel.from_pretrained("bert-base-cased")
model = BERT_CRF_Model(bert_model, num_labels=len(unique_labels))


crf_layer = CRF(num_tags=len(unique_labels))


training_args = TrainingArguments(
    output_dir="my_awesome_ner_model",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=12,
    per_device_eval_batch_size=12,
    num_train_epochs=1,
    weight_decay=0.01,
    push_to_hub=True,
)


trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["val"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    crf_layer=crf_layer  # Pass the CRF layer
)


trainer.train()

我不确定为什么会出现此错误，任何帮助将不胜感激！

Answer 1

pytorch-crf，期望所有 first 标记都被揭开，not接受

-100

作为填充标记 id（只有

[0, num_labels)

中的 id，它期望 Torch 张量，当然期望打开因此，请执行以下操作：

            mask = labels != self.pad_token_id
            mask[:, 0] = True

            emissions_torch = torch.from_numpy(logits).float().to(self.device)
            mask_torch = torch.from_numpy(mask).bool().to(self.device)

            predictions = self.model.crf.decode(emissions=emissions_torch, mask=mask_torch)

祝你好运。

如何将 CRF 层添加到用于 NER 任务的 BERT 模型？

问题描述投票：0回答：1

1个回答

最新问题

如何将 CRF 层添加到用于 NER 任务的 BERT 模型？

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1