我正在尝试为多变量时间序列数据构建一个生成式循环 GAN 架构。这是我的模型的判别器:
from torchgan.models import Generator, Discriminator
import torch
import torch.nn as nn
class RGANDiscriminator(Discriminator):
def __init__(self,
sequence_length,
input_size,
hidden_size=None,
num_layers=1,
dropout=0,
last_layer=None,
device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
**kwargs):
hidden_size = hidden_size or input_size
self.device = device
self.input_size = input_size
self.sequence_length = sequence_length
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = dropout
self.label_type ="none"
# Set kwargs (might overried above attributes)
for key, value in kwargs.items():
setattr(self, key, value)
super(RGANDiscriminator, self).__init__(self.input_size,
self.label_type)
# Build RNN layer
self.rnn = nn.LSTM(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.dropout = nn.Dropout(dropout)
self.linear = nn.Linear(hidden_size, 1)
self.last_layer = last_layer
# Initialize all weights.
self.rnn.apply(init_weights)
nn.init.xavier_normal_(self.linear.weight)
def forward(self, x):
h0 = torch.randn((self.num_layers, x.size(0), self.hidden_size)).to(self.device)
c0 = torch.randn((self.num_layers, x.size(0), self.hidden_size)).to(self.device)
print(f"input {x.shape}")
print(f"x: {x}")
length = torch.LongTensor([torch.max((x[i,:,0]!=0).nonzero()).item()+1 for i in range(x.shape[0])])
packed = nn.utils.rnn.pack_padded_sequence(
x, length, batch_first=True, enforce_sorted=False
)
out_packed, (_, _) = self.rnn(packed, (h0, c0))
y, _ = nn.utils.rnn.pad_packed_sequence(out_packed, batch_first=True)
y = self.dropout(y)
y = self.linear(y)
return y if self.last_layer is None else self.last_layer(y)
这是我的模型的训练模块:
def train(self, epochs, writer_frequency=1, saver_frequency=20):
avg_mmd = []
for epoch in range(epochs):
mmd = []
for batch_idx, (data_attribute, data_feature) in enumerate(self.real_train_dl):
data_attribute = data_attribute.to(self.device)
input_feature = data_feature.to(self.device)
batch_size = data_attribute.shape[0]
### Train Discriminator: max log(D(x)) + log(1 - D(G(z)))
noise = gen_noise((batch_size, self.sequence_length[0], self.noise_dim)).to(self.device)
print(f"Noise:{noise.shape}")
print(f"data attribute {data_attribute.shape}")
noise = torch.cat((data_attribute, noise), dim=2)
print(f"noise again : {noise.shape}")
input_feature = torch.cat((data_attribute, input_feature), dim=2)
print(f"input_feature : {input_feature.shape}")
fake = self.generator(noise)
print(f"fake :{fake.shape}")
x = fake.clone()
x = x.permute(0,2,1)
padded = nn.ConstantPad1d((0, input_feature.shape[1] - fake.shape[1]), 0)(x)
x = padded.permute(0,2,1)
print(f"new fake :{x.shape}")
mmd.append(calculate_mmd_rbf(torch.mean(fake, dim=0).detach().cpu().numpy(),
torch.mean(data_feature, dim=0).detach().cpu().numpy()))
fake = torch.cat((data_attribute, x), dim=2)
disc_real = self.discriminator(input_feature).view(-1)
lossD_real = self.criterion(disc_real, torch.ones_like(disc_real))
disc_fake = self.discriminator(fake).view(-1)
lossD_fake = self.criterion(disc_fake, torch.zeros_like(disc_fake))
lossD = (lossD_real + lossD_fake) / 2
self.discriminator.zero_grad()
lossD.backward(retain_graph=True)
self.optimizer_dis.step()
### Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z))
output = self.discriminator(fake).view(-1)
lossG = self.criterion(output, torch.ones_like(output))
self.generator.zero_grad()
lossG.backward()
self.optimizer_gen.step()
这是错误消息
INFO:config_logger:Batch Size: 40
INFO:config_logger:Noise Dimension: 5
INFO:config_logger:d_rounds: 1
INFO:config_logger:g_rounds: 1
INFO:config_logger:Device: cuda:0
INFO:config_logger:Input Dimension: 14
INFO:config_logger:Output Dimension: 12
INFO:config_logger:Sequence Length: (382,)
Noise:torch.Size([40, 382, 5])
data attribute torch.Size([40, 382, 14])
noise again : torch.Size([40, 382, 19])
input_feature : torch.Size([40, 382, 26])
fake :torch.Size([40, 340, 12])
new fake :torch.Size([40, 382, 12])
input torch.Size([40, 382, 26])
input torch.Size([40, 382, 26])
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-2-5cea213d3975> in <module>
676 time_logging_file=time_logging_file, batch_size=batch_size,
677 config_logging_file=config_logging_file)
--> 678 trainer.train(epochs=epoch, writer_frequency=1, saver_frequency=save_frequency)
8 frames
<ipython-input-2-5cea213d3975> in train(self, epochs, writer_frequency, saver_frequency)
592 disc_real = self.discriminator(input_feature).view(-1)
593 lossD_real = self.criterion(disc_real, torch.ones_like(disc_real))
--> 594 disc_fake = self.discriminator(fake).view(-1)
595 lossD_fake = self.criterion(disc_fake, torch.zeros_like(disc_fake))
596 lossD = (lossD_real + lossD_fake) / 2
/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-2-5cea213d3975> in forward(self, x)
370 c0 = torch.randn((self.num_layers, x.size(0), self.hidden_size)).to(self.device)
371 print(f"input {x.shape}")
--> 372 print(f"x: {x}")
373 length = torch.LongTensor([torch.max((x[i,:,0]!=0).nonzero()).item()+1 for i in range(x.shape[0])])
374 packed = nn.utils.rnn.pack_padded_sequence(
/usr/local/lib/python3.8/dist-packages/torch/_tensor.py in __format__(self, format_spec)
857 if self.dim() == 0 and not self.is_meta and type(self) is Tensor:
858 return self.item().__format__(format_spec)
--> 859 return object.__format__(self, format_spec)
860
861 @_handle_torch_function_and_wrap_type_error_to_not_implemented
/usr/local/lib/python3.8/dist-packages/torch/_tensor.py in __repr__(self, tensor_contents)
425 )
426 # All strings are unicode in Python 3.
--> 427 return torch._tensor_str._str(self, tensor_contents=tensor_contents)
428
429 def backward(
/usr/local/lib/python3.8/dist-packages/torch/_tensor_str.py in _str(self, tensor_contents)
635 with torch.no_grad():
636 guard = torch._C._DisableFuncTorch()
--> 637 return _str_intern(self, tensor_contents=tensor_contents)
/usr/local/lib/python3.8/dist-packages/torch/_tensor_str.py in _str_intern(inp, tensor_contents)
566 tensor_str = _tensor_str(self.to_dense(), indent)
567 else:
--> 568 tensor_str = _tensor_str(self, indent)
569
570 if self.layout != torch.strided:
/usr/local/lib/python3.8/dist-packages/torch/_tensor_str.py in _tensor_str(self, indent)
326 )
327 else:
--> 328 formatter = _Formatter(get_summarized_data(self) if summarize else self)
329 return _tensor_str_with_formatter(self, indent, summarize, formatter)
330
/usr/local/lib/python3.8/dist-packages/torch/_tensor_str.py in __init__(self, tensor)
113
114 else:
--> 115 nonzero_finite_vals = torch.masked_select(
116 tensor_view, torch.isfinite(tensor_view) & tensor_view.ne(0)
117 )
RuntimeError: numel: integer multiplication overflow
如果有人能帮助我理解为什么我会收到此错误,我将不胜感激。
溢出意味着你的操作值爆炸,以至于结果占用了太多内存,程序无法继续运行。这意味着您会经历数值不稳定。通常它是由梯度爆炸引起的,并且有数学技巧可以阻止上溢/下溢的发生:
这里是一篇更深入解释的文章
类标签可能大于数据集中存在的类数量。例如。您有包含 10 个类别的 CIFAR10 数据集,但在数据集中的某处您有一个类别标签