我使用 pytorch 训练了 BertClassifier 模型。创建 best.pt 之后,我想在生产中制作我的模型,并使用它从样本开始进行预测和分类,因此我从检查点恢复它们。否则,将其放入评估和冻结模型后,我使用 .predict 来处理我的示例,但我遇到了此属性错误。在调用检查点之前我还初始化了它。当我错了?谢谢您的帮助!
def save_ckp(state, is_best, checkpoint_path, best_model_path):
"""
function created to save checkpoint, the latest one and the best one.
This creates flexibility: either you are interested in the state of the latest checkpoint or the best checkpoint.
state: checkpoint we want to save
is_best: is this the best checkpoint; min validation loss
checkpoint_path: path to save checkpoint
best_model_path: path to save best model
"""
f_path = checkpoint_path
# save checkpoint data to the path given, checkpoint_path
torch.save(state, f_path)
# if it is a best model, min validation loss
if is_best:
best_fpath = best_model_path
# copy that checkpoint file to best path given, best_model_path
shutil.copyfile(f_path, best_fpath)
def load_ckp(checkpoint_fpath, model, optimizer):
"""
checkpoint_path: path to save checkpoint
model: model that we want to load checkpoint parameters into
optimizer: optimizer we defined in previous training
"""
# load check point
checkpoint = torch.load(checkpoint_fpath)
# initialize state_dict from checkpoint to model
model.load_state_dict(checkpoint['state_dict'])
# initialize optimizer from checkpoint to optimizer
optimizer.load_state_dict(checkpoint['optimizer'])
# initialize valid_loss_min from checkpoint to valid_loss_min
valid_loss_min = checkpoint['valid_loss_min']
# return model, optimizer, epoch value, min validation loss
return model, optimizer, checkpoint['epoch'], valid_loss_min.item()
#Create the BertClassfier class
class BertClassifier(nn.Module):
"""Bert Model for Classification Tasks."""
def __init__(self, freeze_bert=True):
"""
@param bert: a BertModel object
@param classifier: a torch.nn.Module classifier
@param freeze_bert (bool): Set `False` to fine-tune the BERT model
"""
super(BertClassifier, self).__init__()
.......
def forward(self, input_ids, attention_mask):
''' Feed input to BERT and the classifier to compute logits.
@param input_ids (torch.Tensor): an input tensor with shape (batch_size,
max_length)
@param attention_mask (torch.Tensor): a tensor that hold attention mask
information with shape (batch_size, max_length)
@return logits (torch.Tensor): an output tensor with shape (batch_size,
num_labels) '''
# Feed input to BERT
outputs = self.bert(input_ids=input_ids,
attention_mask=attention_mask)
# Extract the last hidden state of the token `[CLS]` for classification task
last_hidden_state_cls = outputs[0][:, 0, :]
# Feed input to classifier to compute logits
logits = self.classifier(last_hidden_state_cls)
return logits
def initialize_model(epochs):
""" Initialize the Bert Classifier, the optimizer and the learning rate scheduler."""
# Instantiate Bert Classifier
bert_classifier = BertClassifier(freeze_bert=False)
# Tell PyTorch to run the model on GPU
bert_classifier = bert_classifier.to(device)
# Create the optimizer
optimizer = AdamW(bert_classifier.parameters(),
lr=lr, # Default learning rate
eps=1e-8 # Default epsilon value
)
# Total number of training steps
total_steps = len(train_dataloader) * epochs
# Set up the learning rate scheduler
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps=0, # Default value
num_training_steps=total_steps)
return bert_classifier, optimizer, scheduler
def train(model, train_dataloader, val_dataloader, valid_loss_min_input, checkpoint_path, best_model_path, start_epochs, epochs, evaluation=True):
"""Train the BertClassifier model."""
# Start training loop
logging.info("--Start training...\n")
# Initialize tracker for minimum validation loss
valid_loss_min = valid_loss_min_input
for epoch_i in range(start_epochs, epochs):
# =======================================
# Training
# =======================================
# Print the header of the result table
logging.info((f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}"))
# Measure the elapsed time of each epoch
t0_epoch, t0_batch = time.time(), time.time()
# Reset tracking variables at the beginning of each epoch
total_loss, batch_loss, batch_counts = 0, 0, 0
# Put the model into the training mode
model.train()
# For each batch of training data...
for step, batch in enumerate(train_dataloader):
batch_counts +=1
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Zero out any previously calculated gradients
model.zero_grad()
# Perform a forward pass. This will return logits.
logits = model(b_input_ids, b_attn_mask)
# Compute loss and accumulate the loss values
loss = loss_fn(logits, b_labels)
batch_loss += loss.item()
total_loss += loss.item()
# Perform a backward pass to calculate gradients
loss.backward()
# Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and the learning rate
optimizer.step()
scheduler.step()
# Print the loss values and time elapsed for every 20 batches
if (step % 500 == 0 and step != 0) or (step == len(train_dataloader) - 1):
# Calculate time elapsed for 20 batches
time_elapsed = time.time() - t0_batch
# Print training results
logging.info(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")
# Reset batch tracking variables
batch_loss, batch_counts = 0, 0
t0_batch = time.time()
# Calculate the average loss over the entire training data
avg_train_loss = total_loss / len(train_dataloader)
logging.info("-"*70)
# =======================================
# Evaluation
# =======================================
if evaluation == True:
# After the completion of each training epoch, measure the model's performance
# on our validation set.
val_loss, val_accuracy = evaluate(model, val_dataloader)
# Print performance over the entire training data
time_elapsed = time.time() - t0_epoch
logging.info(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^10.6f} | {time_elapsed:^9.2f}")
logging.info("-"*70)
logging.info("\n")
# create checkpoint variable and add important data
checkpoint = {
'epoch': epoch_i + 1,
'valid_loss_min': val_loss,
'state_dict': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
# save checkpoint
save_ckp(checkpoint, False, checkpoint_path, best_model_path)
## TODO: save the model if validation loss has decreased
if val_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,val_loss))
# save checkpoint as best model
save_ckp(checkpoint, True, checkpoint_path, best_model_path)
valid_loss_min = val_loss
logging.info("-----------------Training complete--------------------------")
def evaluate(model, val_dataloader):
"""After the completion of each training epoch, measure the model's performance on our validation set."""
# Put the model into the evaluation mode. The dropout layers are disabled during the test time.
model.eval()
# Tracking variables
val_accuracy = []
val_loss = []
# For each batch in our validation set...
for batch in val_dataloader:
# Load batch to GPU
b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
# Compute logits
with torch.no_grad():
logits = model(b_input_ids, b_attn_mask)
# Compute loss
loss = loss_fn(logits, b_labels)
val_loss.append(loss.item())
# Get the predictions
preds = torch.argmax(logits, dim=1).flatten()
# Calculate the accuracy rate
accuracy = (preds == b_labels).cpu().numpy().mean() * 100
val_accuracy.append(accuracy)
# Compute the average accuracy and loss over the validation set.
val_loss = np.mean(val_loss)
val_accuracy = np.mean(val_accuracy)
return val_loss, val_accuracy
bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs)
train(model = bert_classifier ......)
bert_classifier, optimizer, scheduler = initialize_model(epochs=n_epochs)
model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer)
model.eval()
model.freeze()
sample = {
"seq": "ABCDE",}
predictions = model.predict(sample)
AttributeError: 'BertClassifier' object has no attribute 'predict'
一般来说,人们会为你编写预测函数。 如果没有,您需要处理低级的事情。 在这行之后,您加载了经过训练的参数。
model, optimizer, start_epoch, valid_loss_min = load_ckp(r"./best_model/best_model.pt", bert_classifier, optimizer)
之后,你需要做
model.forward(intput_seq,this_attention_mask_maybe_null)
。
您可以看到模型中的前向方法是:def forward(self, input_ids, attention_mask)
。