将 DNN 转换为 RNN,NN 的形状错误(pythorch)

问题描述 投票:0回答:0

我目前正在尝试将 DNN 代码转换为 RNN 代码,但我遇到了一些问题。在 DNN 代码中,观察类采用输入张量并将一系列线性层和 ELU 激活应用于它以产生输出张量。我试图通过使用 nn.RNN 模块,然后是线性层和 ELU 激活,将此类转换为 RNN。但是,当我运行 RNN 代码时,出现与张量大小相关的错误。有人可以帮助我了解我在 RNN 实现中可能做错了什么,并建议我可以做出任何更改来解决这些问题吗?我在下面包含了 DNN 和 RNN 代码以供参考。提前谢谢你!

## DNN code (works fine!)


class observe(nn.Module):
    
    def __init__(self,trunc,N):
        super(observe, self).__init__()
        # NN Arch
        self.obs = nn.Sequential(
            nn.Linear(N, 500),
            nn.ELU(),
            nn.Linear(500, 500),
            nn.ELU(),  
            nn.Linear(500, 500),
            nn.ELU(),    
            nn.Linear(500, 500),
            nn.ELU(),                                       
            nn.Linear(500, trunc),
            nn.ELU()
        )
        
    def forward(self, y):
        # Compute the dictionary
        dic=self.obs(y)

        return torch.cat((y, dic), dim=-1)


###############################################################################
# Functions
###############################################################################
    
def get_batch(X,X_dt ):
    [M,N]=X.shape
    s = torch.from_numpy(np.random.choice(np.arange(M, dtype=np.int64), args.batch_size, replace=False))

    batch_x = X[s]  # (M, D)
    batch_y = X_dt[s]  # (M, D)


    return batch_x,  batch_y

    ###########################################################################
    # Initialize NN for learning the RHS and setup optimization parms
    ###########################################################################
    iters=1
    dh=64
    func = observe(250,dh).double()
    optimizer = optim.Adam(func.parameters(),lr=1e-5)
    #optimizer = optim.Adam(func.parameters(), lr=1e-3,weight_decay=10**-5) #optimizer = optim.RMSprop(func.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(iters/3), gamma=0.9999)
    end = time.time()

    err=[]
    test_freq=1
    freq=int(iters/10)
    ii = 0
    ###########################################################################
    # Optimization iterations
    ###########################################################################
    S=[]
    for itr in range(1, iters + 1):


        true_y, true_yf = get_batch(true_y_long, true_yf_long)
        print(true_y.shape)  #torch.Size([512, 64])
        print(true_yf.shape) #torch.Size([512, 64])

        avg_loss=0

        optimizer.zero_grad()
        pred=func(true_y)  #torch.Size([512, 314])
        predf=func(true_yf)  #torch.Size([512, 314])

        print(pred.shape)
        print(predf.shape)

        loss = torch.linalg.norm((torch.eye(round(512),requires_grad = True)[email protected](pred))@predf) 
        avg_loss+=loss.item()
        loss.backward() #Computes the gradient of the loss (w.r.t to the parameters of the network?)
        optimizer.step()
        scheduler.step()


        if itr % test_freq == 0:
            with torch.no_grad():
                err.append(avg_loss)
                #output('Iter {:04d} | Total Loss {:.6f} | Time {:.6f} | LR {:.6f}'.format(itr, loss.item(),time.time() - end,optimizer.param_groups[0]['lr'])+'\n')
                print('Iter {:04d} | Total Loss {:.10f} | Time {:.6f} | LR {:.6f}'.format(itr, loss.item(),time.time() - end,optimizer.param_groups[0]['lr']))
                ii += 1
            
    end = time.time()


# RNN code



class observe(nn.Module):

    def __init__(self, trunc, N):
        super(observe, self).__init__()        
        self.rnn = nn.RNN(input_size=N, hidden_size=512-64, num_layers=2, batch_first=True)
        self.linear = nn.Linear(512-64, trunc)
        self.activation = nn.ELU()  
        # self.linear2 = nn.Linear(500, trunc)


    def forward(self, y_seq):
        output, _ = self.rnn(y_seq)
        output = self.linear(self.activation(output[:, -1, :]))
        return torch.cat((y_seq[:, -1, :], output), dim=-1)


###############################################################################
# Functions
###############################################################################
    
def get_batch(X,X_dt ):
    [M,N]=X.shape
    s = torch.from_numpy(np.random.choice(np.arange(M, dtype=np.int64), args.batch_size, replace=False))

    batch_x = X[s]  # (M, D)
    batch_y = X_dt[s]  # (M, D)

    # Reshape the batches to have shape (batch_size, sequence_length, input_size)

    batch_x = batch_x.unsqueeze(1)
    batch_y = batch_y.unsqueeze(1)


    return batch_x,  batch_y



    ###########################################################################
    # Initialize NN for learning the RHS and setup optimization parms
    ###########################################################################
    iters=10000
    dh=64
    func = observe(250,dh).double()
    optimizer = optim.Adam(func.parameters(),lr=1e-5)
    #optimizer = optim.Adam(func.parameters(), lr=1e-3,weight_decay=10**-5) #optimizer = optim.RMSprop(func.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(iters/3), gamma=0.9999)
    end = time.time()

    err=[]
    test_freq=1
    freq=int(iters/10)
    ii = 0
    ###########################################################################
    # Optimization iterations
    ###########################################################################
    S=[]
    for itr in range(1, iters + 1):


        true_y, true_yf = get_batch(true_y_long, true_yf_long)

        print(true_y.shape)
        print(true_yf.shape)

        avg_loss=0

        optimizer.zero_grad()

        # Reshape the input tensors to have shape (batch_size, sequence_length, input_size)
        true_y = true_y.transpose(0, 1)
        true_yf = true_yf.transpose(0, 1)


        pred=func(true_y)
        predf=func(true_yf)


        loss = torch.linalg.norm((torch.eye(round(512),requires_grad = True) [email protected](pred))@predf) 
        avg_loss+=loss.item()
        loss.backward() #Computes the gradient of the loss (w.r.t to the parameters of the network?)
        optimizer.step()
        scheduler.step()

        # Print out the Loss and the time the computation took
        if itr % test_freq == 0:
            with torch.no_grad():
                err.append(avg_loss)
                print('Iter {:04d} | Total Loss {:.10f} | Time {:.6f} | LR {:.6f}'.format(itr, loss.item(),time.time() - end,optimizer.param_groups[0]['lr']))
                ii += 1
            
    end = time.time()
        
        

我希望 pred 和 predf 的大小为 [512,314],但它们的大小为 torch.Size([1, 314])。不确定

deep-learning recurrent-neural-network
© www.soinside.com 2019 - 2024. All rights reserved.