LSTM 不断绘制纯直线，结果等于 0

Question

我尝试使用 Pytorch 中的 LSTM 模型来预测家庭消费电力任务，我尝试通过 MinMaxScaler() 添加归一化，并尝试将其重塑为输入的所需形状 [batch_size，sequence_length， num_features]，在这种情况下我有 3 个特征，并且也将序列长度设置为 1，问题是当我训练模型时，训练损失和测试损失相对较低，尽管模型绘制得不好直线不适合任何东西。这是训练数据的代码和 1 个测试

training_year = [2006, 2007, 2008]
testing_year = [2009, 2010]
training_df = copy.deepcopy(machine_learning_df.loc[machine_learning_df['year'].isin(training_year), :])
testing_df = copy.deepcopy(machine_learning_df.loc[machine_learning_df['year'].isin(testing_year), :])
features_list = ['date_ordinal', 'global_intensity', 'sub_metering3']
targeted_variable = 'global_active_power'
x_train = training_df[features_list].astype(int).values
y_train = training_df[targeted_variable].astype(int).values
x_test = testing_df[features_list].astype(int).values
y_test = testing_df[targeted_variable].astype(int).values

scaler = MinMaxScaler()

BATCH_SIZE = 32

x_train = scaler.fit_transform(x_train.reshape(-1, 3))
x_test = scaler.fit_transform(x_test.reshape(-1, 3))

x_train = torch.tensor(x_train).view(-1, 1, 3).type(torch.float32)
y_train = torch.tensor(y_train).type(torch.float32)
x_test = torch.tensor(x_test).view(-1, 1, 3).type(torch.float32)
y_test = torch.tensor(y_test).type(torch.float32)

train_dataset = TensorDataset(x_train, y_train)
train_dataloader = DataLoader(dataset=train_dataset, 
                              shuffle=True,
                              batch_size=BATCH_SIZE)
test_dataset = TensorDataset(x_test, y_test)
test_dataloader = DataLoader(dataset=test_dataset,
                             shuffle=False,
                             batch_size=BATCH_SIZE)

#Create the neural network 
class LSTMmodel(nn.Module): 
    def __init__(self,
                 output_size, 
                 input_size,
                 hidden_size,
                 num_layers): 
        super(LSTMmodel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.stacked_layer = nn.Sequential(
            nn.Linear(in_features=hidden_size, out_features=512), 
            nn.ReLU(), 
            nn.Dropout(p=0.2), 
            nn.Linear(in_features=512, out_features=256), 
            nn.ReLU(),
            nn.Dropout(p=0.2), 
            nn.Linear(in_features=256, out_features=125), 
            nn.ReLU(), 
            nn.Dropout(p=0.2),
            nn.Linear(in_features=125, out_features=64), 
            nn.ReLU(),
            nn.Dropout(p=0.2), 
            nn.Linear(in_features=64, out_features=output_size), 
            nn.ReLU()
        )
    
    def forward(self, x): 
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        out = self.stacked_layer(out)
        return out 

#Initialize the model 
model = LSTMmodel(input_size=3, output_size=1, hidden_size=16, num_layers=3)
loss_fn = nn.MSELoss() 
learning_rate = 0.1
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#Create the traning and testing model 
clip_value = 1
epoches = 1
for epoch in range(epoches):
    print(f'Epoch {epoch + 1}')
    model.train() 
    for batch, (train_features, train_label) in enumerate(train_dataloader): 
        train_pred = model(train_features)
        train_loss = loss_fn(train_pred, train_label.unsqueeze(dim=1))
        train_loss.backward() 
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip_value)
        optimizer.step() 
        optimizer.zero_grad() 
        if batch % 4000 == 0: 
            print(f'Looked at {batch * len(train_features)}/{len(train_dataloader.dataset)} samples')
        
    model.eval() 
    total_test_loss = 0
    test_feature_list = []
    test_pred_list = []
    with torch.inference_mode(): 
        for test_feature, test_labels in test_dataloader: 
            test_pred = model(test_feature)
            test_loss = loss_fn(test_pred, test_labels.unsqueeze(dim=1))
            test_pred_list.extend(test_pred.detach().numpy())
            test_feature_list.extend(test_labels.detach().numpy())
            total_test_loss += test_loss 
        print(f'Train loss {train_loss.item():.4f} | Test loss {test_loss.item():.4f} | Total test loss {total_test_loss}')

test_feature_array = np.array(test_feature_list)
test_pred_array = np.array(test_pred_list)
predicted_df = pd.DataFrame({
    'date_ordinal': testing_df['date_ordinal'],
    'global_active_power': test_pred_array.flatten()
})

def plotting_predicted(training_data, testing_data, predicted_data=None): 
    plt.figure(figsize=(12, 6))
    #Plot the training data 
    sns.lineplot(data=training_data, x='date_ordinal', y='global_active_power', label='Training data')

    #Plot the testing data
    sns.lineplot(data=testing_data, x='date_ordinal', y='global_active_power', label='Testing data')

    if predicted_data is not None: 
        sns.lineplot(data=predicted_data, x='date_ordinal', y='global_active_power', label='Predicted data')

    plt.xlabel('Date')
    plt.xticks(rotation='vertical')
    plt.ylabel('Total consumptions')
    plt.legend()
    plt.show()
plotting_predicted(training_df, testing_df, predicted_df)

Epoch 1
Looked at 0/1070566 samples
Looked at 128000/1070566 samples
Looked at 256000/1070566 samples
Looked at 384000/1070566 samples
Looked at 512000/1070566 samples
Looked at 640000/1070566 samples
Looked at 768000/1070566 samples
Looked at 896000/1070566 samples
Looked at 1024000/1070566 samples
Train loss 1.0000 | Test loss 0.0769 | Total test loss 39069.015625

我尝试将大小重塑为 (-1, 1, 3) 或 (-1, 3, 1)，我不太确定我理解的哪一部分导致了预测问题

Answer 1

模型末尾的

nn.ReLU()

导致其将所有负输出值归零。

LSTM 不断绘制纯直线，结果等于 0

问题描述投票：0回答：1

1个回答

最新问题

LSTM 不断绘制纯直线，结果等于 0

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1