我正在尝试理解 LSTM,并想实现一个简单的示例,如果序列中“1”的数量为奇数,则将序列分类为“0”,如果“1”的数量为偶数,则将序列分类为“1”。这是我的数据生成和训练例程:
import torch
import numpy as np
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from Dataset import LSTMDataset # Custom Dataset
from Network import LSTMNet # Custom Network
if __name__ == "__main__":
numSamples = 1000
sampleLength = 5
samples = np.ndarray( shape=( numSamples, sampleLength ), dtype=np.float32 )
labels = np.ndarray( shape=( numSamples ), dtype=np.float32 )
for s in range( numSamples ):
sample = np.random.choice( [ 0, 1 ], size=sampleLength )
samples[ s ] = sample
even = np.count_nonzero( sample == 1 ) % 2 == 0
labels[ s ] = int( even )
X_train, X_test, y_train, y_test = train_test_split( samples, labels, test_size=0.25, random_state=42 )
trainingSet = LSTMDataset( X_train, y_train )
testSet = LSTMDataset( X_test, y_test )
training_loader = DataLoader( trainingSet, batch_size=1, shuffle=True )
validation_loader = DataLoader( testSet, batch_size=1, shuffle=False )
model = LSTMNet( inputSize= sampleLength )
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
loss_fn = torch.nn.BCELoss()
for epoch in range( 10 ):
yPredicted = []
yTruth = []
for i, data in enumerate( training_loader ):
inputs, labels = data
optimizer.zero_grad()
outputs = model(inputs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
yTruth.append( int( labels.item() ) )
yPredicted.append( int( torch.round( outputs ).item() ) )
accuracy = accuracy_score( yTruth, yPredicted )
print( f"Accuracy: {accuracy:.2f}" )
我的数据集和网络:
class LSTMDataset( Dataset ):
def __init__( self, x, y ):
self.x = x
self.y = y
def __len__(self):
return self.y.shape[ 0 ]
def __getitem__(self, idx):
sample, label = self.x[ idx ], self.y[ idx ]
return sample.reshape( ( -1, 1 ) ), label.reshape( ( 1 ) )
class LSTMNet( nn.Module ):
def __init__( self, sequenceLength ):
super().__init__()
self.hidden_size = 10
self.lstm = nn.LSTM( input_size=1, hidden_size=self.hidden_size, num_layers=2, batch_first=True )
self.net = nn.Sequential(
nn.Flatten(),
nn.ReLU(),
nn.Linear( sequenceLength * self.hidden_size, 1 ),
nn.Sigmoid()
)
def forward(self, x):
x, _ = self.lstm( x )
x = self.net( x )
return x
但不幸的是,我的训练准确率从未超过 53%。有人对我做错了什么有任何提示吗?
我的网络的输入形状是
( 1, 5, 1 )
,我想将序列元素一个接一个地输入到我的网络中,这就是我选择 ( 1, 5, 1 )
而不是 (1, 1, 5 )
的原因。
您将一堆 0 值直接放入网络中。任何值乘以 0 都是 0。0 会破坏模型中的信号。用学习的嵌入替换输入
class LSTMNet( nn.Module ):
def __init__( self, sequenceLength ):
super().__init__()
self.hidden_size = 10
# added embedding layer
self.embedding = nn.Embedding(2, self.hidden_size)
self.lstm = nn.LSTM( input_size=self.hidden_size, hidden_size=self.hidden_size,
num_layers=1, batch_first=True )
self.net = nn.Sequential(
nn.Flatten(),
# added layer here, see note
nn.Linear( sequenceLength * self.hidden_size, sequenceLength * self.hidden_size ),
nn.ReLU(),
nn.Linear( sequenceLength * self.hidden_size, 1 ),
nn.Sigmoid()
)
def forward(self, x):
# remove unit axis so x is size (batch_size, sequence_length)
# convert to long type for embedding
x = self.embedding(x.squeeze().long())
x, _ = self.lstm( x )
x = self.net( x )
return x
该模型添加了嵌入层。我还在顺序部分添加了另一个线性层。严格的矛刺是可选的,但大大提高了收敛性。 LSTM 的输出来自 tanh 函数,这意味着大约一半的值低于 0。执行 LSTM -> ReLU 会丢弃这些值。该模型可以进行补偿,但通过 LSTM 和 ReLU 之间的线性层可以更快地学习。
综上所述,您的模型对序列长度进行了硬编码。在这种情况下,一开始就使用 LSTM 并没有多大意义。 LSTM 适用于可变序列长度任务。如果你有硬编码的序列长度,你可以只使用 MLP
class MLPNet( nn.Module ):
def __init__( self, sequenceLength ):
super().__init__()
self.net = nn.Sequential(
nn.Flatten(),
nn.Linear( sequenceLength, sequenceLength ),
nn.ReLU(),
nn.Linear( sequenceLength, 1 ),
nn.Sigmoid()
)
def forward(self, x):
x = self.net( x )
return x