“PyTorch 模型中的预测不一致:单图像与批处理”

问题描述 投票:0回答:1

我注意到在单个图像与整个数据集上运行预测时模型预测存在显着差异。该模型使用 PyTorch 进行训练,在单独处理与批量处理时对同一图像给出截然不同的预测。有什么方法可以确保同一图像在单独和批量处理时的预测是一致的?

from transformers import Trainer, TrainingArguments, PreTrainedModel, PretrainedConfig
from torch.utils.data import Dataset
import torch
import torch.nn.functional as F
import numpy as np

# Number of Features
num_of_features = 128

# Dataset Class
class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return {"input_ids": self.X[idx], "labels": self.y[idx]}


# Configuration Class
class SequenceConfig(PretrainedConfig):
    model_type = "sequence_transformer"

    def __init__(self, num_features=num_of_features, num_classes=3, d_model=1024, nhead=4, num_layers=4, dim_feedforward=512, **kwargs):
        self.num_features = num_features
        self.num_classes = num_classes
        self.d_model = d_model
        self.nhead = nhead
        self.num_layers = num_layers
        self.dim_feedforward = dim_feedforward
        super().__init__(**kwargs)


# Transformer Model
class SequenceTransformer(PreTrainedModel):
    config_class = SequenceConfig

    def __init__(self, config):
        super().__init__(config)
        self.embedding = torch.nn.Linear(config.num_features, config.d_model)
        self.positional_encoding = torch.nn.Parameter(torch.zeros(1, config.d_model))
        encoder_layer = torch.nn.TransformerEncoderLayer(
            d_model=config.d_model, 
            nhead=config.nhead, 
            dim_feedforward=config.dim_feedforward, 
            batch_first=True
        )
        self.transformer_encoder = torch.nn.TransformerEncoder(encoder_layer, num_layers=config.num_layers)
        self.fc = torch.nn.Linear(config.d_model, config.num_classes)

    def forward(self, input_ids, labels=None):
        src = self.embedding(input_ids) + self.positional_encoding
        output = self.transformer_encoder(src)
        logits = self.fc(output)
        probs = F.softmax(logits, dim=-1)

        loss = None
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(logits, labels)
            
        return {"loss": loss, "logits": logits, "probs": probs} if labels is not None else logits


# Training Code
config = SequenceConfig()
model = SequenceTransformer(config)

# Training Arguments
    batchSize=32
    numWarmUpSteps=int(np.shape(train_image)[0]/batchSize/numOfBreakpointsPerEpoch/10)
    training_args = TrainingArguments(
        output_dir=path,
        num_train_epochs=1, 
        per_device_train_batch_size=batchSize,
        per_device_eval_batch_size=320,
        warmup_steps=numWarmUpSteps,
        weight_decay=0.1,
        logging_strategy='no',
        eval_strategy="epoch",
        save_strategy="epoch",
        metric_for_best_model="accuracy",
        save_only_model=True,
    )

# Trainer Initialization
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

# Train the Model
train_output = trainer.train()

# Save Model and Training Arguments
trainer.save_model("./SavedModels")
torch.save(training_args, "./SavedModels/training_args.bin")

# Prediction Code
training_args_loaded = torch.load("./SavedModels/training_args.bin")
model_save_path = "./SavedModels/"
model = SequenceTransformer(config).from_pretrained(model_save_path)

trainer = Trainer(model=model, compute_metrics=compute_metrics, args=training_args_loaded)
test_data = np.random.rand(10, num_of_features)  # Example test data
test_predictions = trainer.predict(torch.tensor(test_data, dtype=torch.float32))

# Output Test Predictions
print(test_predictions)

对于单个图像,其 [0.37732467 0.2642143 0.35846105] 对于同一批图像,其 [0.3185594 0.40971586 0.2717247 ]。

python pytorch huggingface-transformers huggingface huggingface-trainer
1个回答
0
投票

尝试使用评估模式。 我已经添加它并更新了代码。

from transformers import Trainer, TrainingArguments, PreTrainedModel, PretrainedConfig
from torch.utils.data import Dataset
import torch
import torch.nn.functional as F
import numpy as np

# Number of Features
num_of_features = 128

# Dataset Class
class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return {"input_ids": self.X[idx], "labels": self.y[idx]}

# Configuration Class
class SequenceConfig(PretrainedConfig):
    model_type = "sequence_transformer"

    def __init__(self, num_features=num_of_features, num_classes=3, d_model=1024, nhead=4, 
                 num_layers=4, dim_feedforward=512, **kwargs):
        self.num_features = num_features
        self.num_classes = num_classes
        self.d_model = d_model
        self.nhead = nhead
        self.num_layers = num_layers
        self.dim_feedforward = dim_feedforward
        super().__init__(**kwargs)

# Transformer Model
class SequenceTransformer(PreTrainedModel):
    config_class = SequenceConfig

    def __init__(self, config):
        super().__init__(config)
        self.embedding = torch.nn.Linear(config.num_features, config.d_model)
        self.positional_encoding = torch.nn.Parameter(torch.zeros(1, config.d_model))
        
        encoder_layer = torch.nn.TransformerEncoderLayer(
            d_model=config.d_model, nhead=config.nhead, dim_feedforward=config.dim_feedforward, batch_first=True
        )
        self.transformer_encoder = torch.nn.TransformerEncoder(encoder_layer, num_layers=config.num_layers)
        self.fc = torch.nn.Linear(config.d_model, config.num_classes)

    def forward(self, input_ids, labels=None):
        src = self.embedding(input_ids) + self.positional_encoding
        output = self.transformer_encoder(src)
        logits = self.fc(output)
        probs = F.softmax(logits, dim=-1)
        loss = None
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.config.num_classes), labels.view(-1))
        return {"loss": loss, "logits": logits, "probs": probs} if labels is not None else logits

# Assume `train_dataset`, `val_dataset`, `compute_metrics`, `path`, `train_image`, `numOfBreakpointsPerEpoch` are defined
# Training Code
config = SequenceConfig()
model = SequenceTransformer(config)

batchSize = 32
numWarmUpSteps = int(np.shape(train_image)[0] / batchSize / numOfBreakpointsPerEpoch / 10)
training_args = TrainingArguments(
    output_dir=path,
    num_train_epochs=1,
    per_device_train_batch_size=batchSize,
    per_device_eval_batch_size=320,
    warmup_steps=numWarmUpSteps,
    weight_decay=0.1,
    logging_strategy='no',
    eval_strategy="epoch",
    save_strategy="epoch",
    metric_for_best_model="accuracy",
    save_only_model=True,
)

# Trainer Initialization
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

# Train the Model
train_output = trainer.train()

# Save Model and Training Arguments
trainer.save_model("./SavedModels")
torch.save(training_args, "./SavedModels/training_args.bin")

# Prediction Code
training_args_loaded = torch.load("./SavedModels/training_args.bin")
model_save_path = "./SavedModels/"
model = SequenceTransformer(config).from_pretrained(model_save_path)
model.eval()  # Ensure the model is in evaluation mode

trainer = Trainer(model=model, compute_metrics=compute_metrics, args=training_args_loaded)

test_data = np.random.rand(10, num_of_features)  # Example test data
test_data_torch = torch.tensor(test_data, dtype=torch.float32)

single_image = test_data_torch[0:1]
batch_images = test_data_torch

# Ensure no gradients are calculated during predictions
with torch.no_grad():
    # Process predictions for a single image
    single_prediction = trainer.predict(single_image)
    # Process predictions for a batch
    batch_predictions = trainer.predict(batch_images)

# Output Predictions
print("Single Prediction:", single_prediction.predictions[0])
print("Batch Prediction:", batch_predictions.predictions[0])

使用eval的原因: 在推理时,批量归一化层应用训练期间累积的均值和方差的运行估计,确保输入统一归一化,与其所属的特定批次无关。

© www.soinside.com 2019 - 2024. All rights reserved.