我目前正在尝试将 DNN 代码转换为 RNN 代码,但我遇到了一些问题。在 DNN 代码中,观察类采用输入张量并将一系列线性层和 ELU 激活应用于它以产生输出张量。我试图通过使用 nn.RNN 模块,然后是线性层和 ELU 激活,将此类转换为 RNN。但是,当我运行 RNN 代码时,出现与张量大小相关的错误。有人可以帮助我了解我在 RNN 实现中可能做错了什么,并建议我可以做出任何更改来解决这些问题吗?我在下面包含了 DNN 和 RNN 代码以供参考。提前谢谢你!
## DNN code (works fine!)
class observe(nn.Module):
def __init__(self,trunc,N):
super(observe, self).__init__()
# NN Arch
self.obs = nn.Sequential(
nn.Linear(N, 500),
nn.ELU(),
nn.Linear(500, 500),
nn.ELU(),
nn.Linear(500, 500),
nn.ELU(),
nn.Linear(500, 500),
nn.ELU(),
nn.Linear(500, trunc),
nn.ELU()
)
def forward(self, y):
# Compute the dictionary
dic=self.obs(y)
return torch.cat((y, dic), dim=-1)
###############################################################################
# Functions
###############################################################################
def get_batch(X,X_dt ):
[M,N]=X.shape
s = torch.from_numpy(np.random.choice(np.arange(M, dtype=np.int64), args.batch_size, replace=False))
batch_x = X[s] # (M, D)
batch_y = X_dt[s] # (M, D)
return batch_x, batch_y
###########################################################################
# Initialize NN for learning the RHS and setup optimization parms
###########################################################################
iters=1
dh=64
func = observe(250,dh).double()
optimizer = optim.Adam(func.parameters(),lr=1e-5)
#optimizer = optim.Adam(func.parameters(), lr=1e-3,weight_decay=10**-5) #optimizer = optim.RMSprop(func.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(iters/3), gamma=0.9999)
end = time.time()
err=[]
test_freq=1
freq=int(iters/10)
ii = 0
###########################################################################
# Optimization iterations
###########################################################################
S=[]
for itr in range(1, iters + 1):
true_y, true_yf = get_batch(true_y_long, true_yf_long)
print(true_y.shape) #torch.Size([512, 64])
print(true_yf.shape) #torch.Size([512, 64])
avg_loss=0
optimizer.zero_grad()
pred=func(true_y) #torch.Size([512, 314])
predf=func(true_yf) #torch.Size([512, 314])
print(pred.shape)
print(predf.shape)
loss = torch.linalg.norm((torch.eye(round(512),requires_grad = True)[email protected](pred))@predf)
avg_loss+=loss.item()
loss.backward() #Computes the gradient of the loss (w.r.t to the parameters of the network?)
optimizer.step()
scheduler.step()
if itr % test_freq == 0:
with torch.no_grad():
err.append(avg_loss)
#output('Iter {:04d} | Total Loss {:.6f} | Time {:.6f} | LR {:.6f}'.format(itr, loss.item(),time.time() - end,optimizer.param_groups[0]['lr'])+'\n')
print('Iter {:04d} | Total Loss {:.10f} | Time {:.6f} | LR {:.6f}'.format(itr, loss.item(),time.time() - end,optimizer.param_groups[0]['lr']))
ii += 1
end = time.time()
# RNN code
class observe(nn.Module):
def __init__(self, trunc, N):
super(observe, self).__init__()
self.rnn = nn.RNN(input_size=N, hidden_size=512-64, num_layers=2, batch_first=True)
self.linear = nn.Linear(512-64, trunc)
self.activation = nn.ELU()
# self.linear2 = nn.Linear(500, trunc)
def forward(self, y_seq):
output, _ = self.rnn(y_seq)
output = self.linear(self.activation(output[:, -1, :]))
return torch.cat((y_seq[:, -1, :], output), dim=-1)
###############################################################################
# Functions
###############################################################################
def get_batch(X,X_dt ):
[M,N]=X.shape
s = torch.from_numpy(np.random.choice(np.arange(M, dtype=np.int64), args.batch_size, replace=False))
batch_x = X[s] # (M, D)
batch_y = X_dt[s] # (M, D)
# Reshape the batches to have shape (batch_size, sequence_length, input_size)
batch_x = batch_x.unsqueeze(1)
batch_y = batch_y.unsqueeze(1)
return batch_x, batch_y
###########################################################################
# Initialize NN for learning the RHS and setup optimization parms
###########################################################################
iters=10000
dh=64
func = observe(250,dh).double()
optimizer = optim.Adam(func.parameters(),lr=1e-5)
#optimizer = optim.Adam(func.parameters(), lr=1e-3,weight_decay=10**-5) #optimizer = optim.RMSprop(func.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(iters/3), gamma=0.9999)
end = time.time()
err=[]
test_freq=1
freq=int(iters/10)
ii = 0
###########################################################################
# Optimization iterations
###########################################################################
S=[]
for itr in range(1, iters + 1):
true_y, true_yf = get_batch(true_y_long, true_yf_long)
print(true_y.shape)
print(true_yf.shape)
avg_loss=0
optimizer.zero_grad()
# Reshape the input tensors to have shape (batch_size, sequence_length, input_size)
true_y = true_y.transpose(0, 1)
true_yf = true_yf.transpose(0, 1)
pred=func(true_y)
predf=func(true_yf)
loss = torch.linalg.norm((torch.eye(round(512),requires_grad = True) [email protected](pred))@predf)
avg_loss+=loss.item()
loss.backward() #Computes the gradient of the loss (w.r.t to the parameters of the network?)
optimizer.step()
scheduler.step()
# Print out the Loss and the time the computation took
if itr % test_freq == 0:
with torch.no_grad():
err.append(avg_loss)
print('Iter {:04d} | Total Loss {:.10f} | Time {:.6f} | LR {:.6f}'.format(itr, loss.item(),time.time() - end,optimizer.param_groups[0]['lr']))
ii += 1
end = time.time()
我希望 pred 和 predf 的大小为 [512,314],但它们的大小为 torch.Size([1, 314])。不确定