我正在通过 https://github.com/parrt/fundamentals-of-deep-learning/blob/main/notebooks/3.train-test-diabetes.ipynb 作为练习,但忘记重塑 y 张量在这些行中
y_train = torch.tensor(y_train).float().reshape(-1,1) # column vector
y_test = torch.tensor(y_test).float().reshape(-1,1)
我的模型很早就停止了学习,损失在训练中并没有改善。有谁明白这些
reshape()
调用的效果是什么,我将来如何避免这个错误?
完整代码和注释如下:
def train1(model, X_train, X_test, y_train, y_test,
learning_rate = .5, nepochs=2000):
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
history = [] # track training and validation loss
for epoch in range(nepochs+1):
y_pred = model(X_train)
loss = torch.mean((y_pred - y_train)**2)
y_pred_test = model(X_test)
loss_test = torch.mean((y_pred_test - y_test)**2)
history.append((loss, loss_test))
if epoch % (nepochs//10) == 0:
print(f"Epoch {epoch:4d} MSE train loss {loss:12.3f} test loss {loss_test:12.3f}")
optimizer.zero_grad()
loss.backward() # autograd computes w1.grad, b1.grad, ...
optimizer.step()
return torch.tensor(history)
ncols = X_train.shape[1]
n_neurons = 150
model2 = torch.nn.Sequential(
torch.nn.Linear(ncols, n_neurons),
torch.nn.ReLU(),
torch.nn.Linear(n_neurons, 1)
)
d = load_diabetes()
df = pd.DataFrame(d.data, columns=d.feature_names)
df['disease'] = d.target # "quantitative measure of disease progression one year after baseline"
print (df.head(3))
np.random.seed(1) # set a random seed for consistency across runs
n = len(df)
X = df.drop('disease',axis=1).values
y = df['disease'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)
m = np.mean(X_train,axis=0)
std = np.std(X_train,axis=0)
X_train = (X_train-m)/std
X_test = (X_test-m)/std
X_train = torch.tensor(X_train).float()
X_test = torch.tensor(X_test).float()
# HERE !!!!!!
# without reshape: train loss doesn't emprove beyond epoch 800, loss=6074
# y_train = torch.tensor(y_train).float()
# y_test = torch.tensor(y_test).float()
# print (y_train.shape, y_test.shape) # torch.Size([353]) torch.Size([89])
# with reshape, train loss goes down to 7
y_train = torch.tensor(y_train).float().reshape(-1,1) # column vector
y_test = torch.tensor(y_test).float().reshape(-1,1)
print (y_train.shape, y_test.shape) # torch.Size([353]) torch.Size([89])
########################################################################
history = train1(model2, X_train, X_test, y_train, y_test,
learning_rate=.02, nepochs=8000)
# Epoch 0 MSE train loss 29603.037 test loss 26998.922
# Epoch 800 MSE train loss 2133.840 test loss 3174.325
# Epoch 1600 MSE train loss 1423.420 test loss 4316.454
# Epoch 2400 MSE train loss 375.720 test loss 7257.883
# Epoch 3200 MSE train loss 120.477 test loss 9051.368
# Epoch 4000 MSE train loss 57.527 test loss 10240.634
# Epoch 4800 MSE train loss 31.486 test loss 10784.966
# Epoch 5600 MSE train loss 16.044 test loss 11113.780
# Epoch 6400 MSE train loss 8.490 test loss 11283.872
# Epoch 7200 MSE train loss 6.594 test loss 11503.454
# Epoch 8000 MSE train loss 3.513 test loss 11644.484
假设你不重塑。您的模型输出大小为
y_pred
的 (batch_size, 1)
。您的 y
张量的形状为 (batch_size,)
- 缺少单位轴。轴差异导致y
张量沿着y_pred
传播,这是错误的。当您计算 (y_pred - y)
时,您会得到形状为 (batch_size, batch_size)
的张量,将每个 y
值与每个 y_pred
值进行比较。
当你重塑时,你会得到一个形状为
y
的 (batch_size, 1)
张量。这会导致正确的损失计算,其中 (y_pred - y)
的形状为 (batch_size, 1)
。
ncols = 8
n_neurons = 150
model = torch.nn.Sequential(
torch.nn.Linear(ncols, n_neurons),
torch.nn.ReLU(),
torch.nn.Linear(n_neurons, 1) # model outputs tensor of shape `(batch_size, 1)`
)
batch_size = 64
x = torch.randn(batch_size, ncols)
y = torch.randn(batch_size) # y is shape `(batch_size)`
y_pred = model(x)
print(y.shape, y_pred.shape) # pred and target different shapes
# > torch.Size([64]) torch.Size([64, 1])
loss = (y_pred - y).pow(2)
print(loss.shape) # this causes the loss to broadcast
# > torch.Size([64, 64])
y_reshaped = y.reshape(-1, 1)
print(y_reshaped.shape) # when you reshape the axes match
# > torch.Size([64, 1])
loss = (y_pred - y_reshaped).pow(2)
print(loss.shape) # we get the correct loss shape
# > torch.Size([64, 1])