作为练习,我想创建一个神经网络来预测给定其他两条边的三角形的斜边。在本练习中,我使用毕达哥拉斯定理创建 10,000 个用于训练模型的值。问题是,即使我的平均损失是 0.18,准确率却是 0%。我不确定我做错了什么。
class SimpleMLP(nn.Module):
def __init__(self, num_of_classes=10):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(2, 64),
nn.ReLU(),
nn.Linear(64, 64),
nn.ReLU(),
# Output matches input and number of classes
nn.Linear(64, num_of_classes),
)
def forward(self, x):
return self.layers(x)
class PythagoreanDataset(Dataset):
def __init__(self, transform=None):
self.values = self._get_pythagorean_values()
def __getitem__(self, index):
a, b, c = self.values[index]
label = torch.as_tensor([c], dtype=torch.float)
data = torch.as_tensor([a, b], dtype=torch.float)
return data, label
def __len__(self):
return len(self.values)
def _get_pythagorean_values(self, array_size: int = 10000) -> list:
values = []
for i in range(array_size):
a = float(randint(1, 500))
b = float(randint(1, 500))
c = math.sqrt(pow(a, 2) + pow(b, 2))
values.append((a, b, c))
return values
def _correct(output, target):
predicted_digits = output.argmax(1) # pick digit with largest network output
correct_ones = (predicted_digits == target).type(
torch.float
) # 1.0 for correct, 0.0 for incorrect
return correct_ones.sum().item()
def train(
data_loader: DataLoader,
model: torch.nn.Module,
criterion: torch.nn.Module,
optimizer: torch.optim.Optimizer,
device: torch.device,
):
model.train()
num_batches = len(data_loader)
num_items = len(data_loader.dataset)
train_loss = 0
total_loss = 0
total_correct = 0
for data, target in data_loader:
# Copy data and targets to device
data = data.to(device)
target = target.to(device)
# Do a forward pass
output = model(data)
# Calculate the loss
loss = criterion(output, target)
total_loss += loss
# Count number of correct digits
total_correct += _correct(output, target)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
train_loss = float(total_loss / num_batches)
accuracy = total_correct / num_items
print(f"Train accuracy: {accuracy:.2%}, Average loss: {train_loss:7f}")
return train_loss
def test(
test_loader: DataLoader,
model: torch.nn.Module,
criterion: torch.nn.Module,
device: torch.device,
):
model.eval()
num_batches = len(test_loader)
num_items = len(test_loader.dataset)
test_loss = 0
total_correct = 0
with torch.no_grad():
for data, target in test_loader:
# Copy data and targets to GPU
data = data.to(device)
target = target.to(device)
# Do a forward pass
output = model(data)
# Calculate the loss
loss = criterion(output, target)
test_loss += loss.item()
# Count number of correct digits
total_correct += _correct(output, target)
test_loss = test_loss / num_batches
accuracy = total_correct / num_items
print(f"Test accuracy: {100*accuracy:>0.1f}%, average loss: {test_loss:>7f}")
return test_loss
def main():
device = "cpu"
dataset = PythagoreanDataset()
# Creating data indices for training and validation splits:
validation_split = 0.2
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))
train_indices, val_indices = indices[split:], indices[:split]
# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=BATCH_SIZE, sampler=valid_sampler)
model = SimpleMLP(num_of_classes=1).to(device)
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
epochs = 500
losses = []
for epoch in tqdm(range(epochs)):
print(f"Training epoch: {epoch+1}")
train_loss = train(train_loader, model, criterion, optimizer, device=device)
test_loss = test(test_loader, model, criterion, device=device)
losses.append((train_loss, test_loss))
plot_loss_curves(losses=losses)
# Example prediction
test_input = torch.tensor([[3, 4]], dtype=torch.float32)
predicted_output = model(test_input)
print("Predicted hypotenuse:", predicted_output.item())
---
您的代码有很多没有意义的地方,但我认为
_correct
函数是导致准确性问题的原因。
您创建您的模型
model = SimpleMLP(num_of_classes=1)
您的模型设计时接受大小为
(bs, 2)
的输入并产生大小为 (bs, 1)
的输出
现在你的
_correct
功能:
def _correct(output, target):
predicted_digits = output.argmax(1) # pick digit with largest network output
correct_ones = (predicted_digits == target).type(
torch.float
) # 1.0 for correct, 0.0 for incorrect
return correct_ones.sum().item()
predicted_digits = output.argmax(1)
这行毫无意义。您正在获取单位轴的 argmax。这将为每个值返回 0
output = torch.randn(32, 1)
output.argmax(1)
>tensor([0, 0, 0, 0, 0, 0, 0, 0])
获取向量的 argmax 是处理分类问题的方法,但这不是您在这里所做的。
之后你的评价
correct_ones = (predicted_digits == target)
也就没有意义了。您的模型是产生浮点输出的回归模型。您的模型不太可能输出完美的整数值(即 4.000000001!= 4)。
基于此,我希望
_correct
函数能够为所有预测输出 0.0
。