使用 LSTM 以及一些分类特征进行多变量时间序列预测

问题描述 投票:0回答:1

目前我正在处理如下所示的时间序列数据

点击查看数据

数据由5家公司、15种产品(每家公司有3-5种产品)和6个不同地区组成

目标 构建单一广义预测模型

问题 1 制作单个模型是否正确。

我选择LSTM进行建模,

问题 2 LSTM 是正确的选择吗?如果不是,哪种方法适合此类数据?

为了训练 LSTM,我使用过去 120 天的数据并对接下来的 30 天进行预测,对于输入,我使用除销售之外的过去 120 天的数据,遵循这篇文章 并且目标是未来 30 天的销售

我的方法

数据预处理(无需查看所有类,只需查看预处理函数即可了解其作用,并查看创建序列函数以了解如何制作输入和输出数据)

class DataPreprocess:
    def __init__(self, data_path) -> None:
        self.df= pd.read_csv(data_path, index_col='Date', parse_dates=True)
        
    def preprocess(self, check_model):
        self.feature_extraction()
        self.encode_features()
        self.split_data()
        self.scale_features()
        train_loader, test_loader= self.get_dataloader(check_model)

        return train_loader, test_loader

    def get_sin(self, X, period):
        return np.sin(2*np.pi * (X/period))

    def get_cos(self, X, period):
        return np.cos(2*np.pi * (X/period))

    def feature_extraction(self):
        self.df['DayOfYear']= self.df.index.dayofyear
        self.df['DayOfWeek']= self.df.index.dayofweek
        self.df['Seconds']= self.df.index.map(pd.Timestamp.timestamp)

        self.df['Week Sin']= self.df['DayOfYear'].apply(lambda x: self.get_sin(x, 7))
        self.df['Week Cos']= self.df['DayOfYear'].apply(lambda x: self.get_cos(x, 7))

        self.df['Month Sin']= self.df['DayOfYear'].apply(lambda x: self.get_sin(x, 30))
        self.df['Month Cos']= self.df['DayOfYear'].apply(lambda x: self.get_cos(x, 30))

        self.df['Year Sin']= self.df['DayOfYear'].apply(lambda x: self.get_sin(x, 365))
        self.df['Year Cos']= self.df['DayOfYear'].apply(lambda x: self.get_cos(x, 365))
        
        self.df.drop(['DayOfYear'], axis=1, inplace= True)

    def encode_features(self):
        categorical_features = ['Company', 'Product', 'Region']
        numerical_features = self.df.columns

        encoders= {}

        for col in categorical_features:
            le= LabelEncoder()
            self.df[col]= le.fit_transform(self.df[col])
            encoders[col]= le
    
        with open('artifacts/LSTM/encoders.pkl', 'wb') as file:
            pickle.dump(encoders, file)

    def split_data(self):
        self.train_df= self.df.groupby(['Product', 'Region']).head(-200)
        self.test_df= self.df.groupby(['Product', 'Region']).tail(200)

    def scale_features(self):
        standard_scaler_cols= ['Company', 'Product', 'Region', 'DayOfWeek', 'Seconds',
       'Week Sin', 'Week Cos', 'Month Sin', 'Month Cos', 'Year Sin',
       'Year Cos']
        
        standard_scaler= StandardScaler()
        min_max_scaler= MinMaxScaler()

        self.train_df[standard_scaler_cols]= standard_scaler.fit_transform(self.train_df[standard_scaler_cols])
        self.test_df[standard_scaler_cols]= standard_scaler.transform(self.test_df[standard_scaler_cols])

        self.train_df[['Sales']]= min_max_scaler.fit_transform(self.train_df[['Sales']])
        self.test_df[['Sales']]= min_max_scaler.fit_transform(self.test_df[['Sales']])

        with open('artifacts/LSTM/standard_scaler.pkl', 'wb') as file:
            pickle.dump(standard_scaler, file)

        with open('artifacts/LSTM/min_max_scaler.pkl', 'wb') as file:
            pickle.dump(min_max_scaler, file)

    def create_sequences(self, df, past_window_size= 120, forecast_window_size= 30):

        X= []
        y= []

        logger.info(f'creating sequences with past window : {past_window_size}, forecast_window : {forecast_window_size}')
        
        for prod, reg in df[['Product', 'Region']].drop_duplicates().values:
            df_to_np= df[(df['Product']==prod) & (df['Region']==reg)].to_numpy()
            
            for i in range(len(df_to_np)- past_window_size- forecast_window_size):
                # row= [r for r in df_to_np[i:i+past_window_size]]
                row= df_to_np[i:i+past_window_size, 1:]
                X.append(row)
                # label= [r[0] for r in df_to_np[i+past_window_size: i+past_window_size+forecast_window_size]]
                label= df_to_np[i+past_window_size:i+past_window_size+forecast_window_size, 0]
                y.append(label)
                
        return np.array(X, dtype=float), np.array(y, dtype= float)

    
    def get_dataloader(self, check_model):
        X_train, y_train= self.create_sequences(self.train_df)
        X_test, y_test= self.create_sequences(self.test_df)

        X_train, y_train= torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float()
        X_test, y_test= torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float()

        if check_model:
            train_dataset= TensorDataset(X_train[:5000], y_train[:5000])
            test_dataset= TensorDataset(X_test[:1000], y_test[:1000])
        else:
            train_dataset= TensorDataset(X_train, y_train)
            test_dataset= TensorDataset(X_test, y_test)

        train_loader= DataLoader(train_dataset, batch_size=32, drop_last=True, num_workers=4, pin_memory=True) 
        test_loader= DataLoader(test_dataset, batch_size=32, drop_last=True, num_workers=4, pin_memory=True)

        return train_loader, test_loader

在 create_sequences 函数中,我生成最后 120 个 X 和接下来的 30 个 y

我使用的型号

from torch import nn
import torch

class LSTM_Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers, dropout_prob=0.3):
        super(LSTM_Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_layers = num_layers

        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=num_layers, dropout=dropout_prob)

        self.linear1 = nn.Linear(hidden_size, hidden_size * 2) 
        self.batch_norm1 = nn.BatchNorm1d(hidden_size * 2) 
        self.relu = nn.ReLU()  
        self.dropout = nn.Dropout(dropout_prob) 
        
        self.linear2 = nn.Linear(hidden_size * 2, output_size) 
        # self.batch_norm2 = nn.BatchNorm1d(hidden_size)  
        # self.linear3 = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        h0 = torch.zeros(self.num_layers, X.size(0), self.hidden_size).to(X.device)
        c0 = torch.zeros(self.num_layers, X.size(0), self.hidden_size).to(X.device)
        
        output, (hidden, cell) = self.lstm(X, (h0, c0))

        out = self.linear1(hidden[-1, :, :])
        out = self.batch_norm1(out)
        out = self.relu(out)
        out = self.dropout(out)

        out = self.linear2(out)
        # out = self.batch_norm2(out)
        # out = self.relu(out)
        # out = self.dropout(out)

        # # Output layer
        # out = self.linear3(out)

        return out

模型训练


from tqdm import tqdm
from pathlib import Path
import sys
import torch
from torch import nn
from datetime import datetime
import os

parent_dir= Path(__file__).parent.parent.parent
sys.path.append(str(parent_dir))

from data_preprocess import DataPreprocess
from LSTM_model import LSTM_Model
from src.utils.logger import get_logger

logger= get_logger()

class LSTM:
    def __init__(self, input_size, hidden_size, output_size, num_layers, drop_out= 0.3) -> None:
        self.model = LSTM_Model(input_size, hidden_size, output_size, num_layers, drop_out)

    def train(self, epochs, train_loader, test_loader, load_model, save_every=10, model_path= None):
        self.loss_fn = nn.MSELoss()
        self.model.load_state_dict(torch.load(model_path)) if load_model else None
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.001, weight_decay=1e-5)

        now = datetime.now()
        formatted_time = now.strftime("%d-%m-%y_%H-%M")
        formatted_time
        os.makedirs(f'artifacts/LSTM/{formatted_time}')

        for epoch in range(epochs):
            self.model.train()
            train_loss = 0.0
            for X, y in tqdm(train_loader):
                self.optimizer.zero_grad() 
                outputs = self.model(X)
                loss = self.loss_fn(outputs, y)
                train_loss += loss.item()
                loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
                self.optimizer.step() 
               
            avg_train_loss = train_loss / len(train_loader)

            print(f'Epoch {epoch+1} --- train loss {avg_train_loss}')

            self.model.eval()
            test_loss = 0.0
            with torch.no_grad():
                for X, y in tqdm(test_loader):
                    outputs = self.model(X) 
                    loss = self.loss_fn(outputs, y) 
                    test_loss += loss.item()

            avg_test_loss = test_loss / len(test_loader)

            self.model.train()

            print(f'Epoch {epoch+1} --- test loss {avg_test_loss}\n')

            with open(f'artifacts/LSTM/{formatted_time}/metrics.txt', 'a') as file: 
                file.write(f"Epoch {epoch+1} : \n\ttrain loss {avg_train_loss} \n\ttest loss {avg_test_loss}\n\n")

            if epoch % save_every ==0 :
                torch.save(self.model.state_dict(), f'artifacts/LSTM/{formatted_time}/lstm_epoch_{epoch+1}.pth')

    def predict(self, X, model_path):
        '''
        X: past 120 days records
        '''
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()
        preds= self.model(X).detach().numpy().flatten()
        
        return preds


if __name__=='__main__':
    data_preprocess= DataPreprocess(data_path= 'data/inputs/Corrected_and_Balanced_Time_Series_Data.csv')
    logger.info('Started Data Preprocessing')
    train_loader, test_loader= data_preprocess.preprocess(check_model= False)  
    logger.info('Done Data Preprocessing')
    lstm= LSTM(1, 256, 30, 2, 0.5)
    lstm.train(epochs=10, train_loader=train_loader, test_loader=test_loader, load_model=False, save_every=1)

结果

Epoch 1 :
train loss 0.004617819177448817
test loss 0.006365457516637564

Epoch 2 :
train loss 0.0013970815729702321
test loss 0.010996414257221789

Epoch 3 :
train loss 0.0023961294302583384
test loss 0.012776140605897776

请建议我要去哪里,或者建议更好的方法。

python pytorch time-series lstm multivariate-time-series
1个回答
0
投票

我最近听说了一个非常有趣的替代方案,可以替代 LSTM 或 RNN 来处理时间序列数据。您可以使用称为“格拉米安角场”的技术将每个时间序列转换为图像,然后在卷积神经网络上训练它们。我听说它们比 LSTM 更容易训练,也许你可以尝试一下。马尔可夫转移场还用于将时间序列转换为图像以在卷积网络上进行训练。 我从未尝试过,但我听说有人尝试 LSTM 和 RNN,但它们不起作用,但他们尝试使用格拉米安角场和马尔可夫转换场作为卷积神经网络的输入,并且它起作用了。

© www.soinside.com 2019 - 2024. All rights reserved.