Pytorch：mat1 和 mat2 形状不能相乘（64x8192 和 12800x10）

Question

我正在尝试实现一个 CNN 来对图像中的数字进行分类。当我尝试训练我的神经网络时会出现此错误。

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x8192 and 12800x10)

这是我的 CNN 的架构。它有两个隐藏层，每层都有不同的内核大小和内核数量：

classnet = nn.Sequential(
      nn.Conv2d(3, 16, kernel_size=5),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(16, 512, kernel_size=7),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Flatten(),
      nn.Linear(512 * 5 * 5, 10),
      nn.LogSoftmax(dim=1)
      )

这里是 CNN 的主类：

class ClassifierNeuralNet(nn.Module):
    def __init__(self, classnet):
        super(ClassifierNeuralNet, self).__init__()
        # We provide a sequential module with layers and activations
        self.classnet = classnet
        # The loss function (the negative log-likelihood)
        self.nll = nn.NLLLoss(reduction="none")  # it requires log-softmax as input!!

    # This function classifies an image x to a class.
    # The output must be a class label (long).
    def classify(self, x):
        # using classnet to perform a forward pass on the image
        out = self.classnet(x)
        # using argmax to gain the class with maximum probability
        y_pred = out.argmax(dim=1)
        return y_pred

    # This function is crucial for a module in PyTorch.
    # In our framework, this class outputs a value of the loss function.
    def forward(self, x, y, reduction="avg"):
        # using classnet to perform a forward pass on the image  
        out = self.classnet(x)
        print(out.shape)
        # passing the result of forward pass to nll loss function
        loss = self.nll(out,y)
        # return the result base on the reduction parameter
        if reduction == "sum":
            return loss.sum()
        else:
            return loss.mean()

主要班级从事另一项任务（从 8x8 图像中识别数字），现在我正尝试将其应用到更大的 CNN 上，以识别 32x32 图像中的数字。

这是我的“旧”架构和运行/评估部分：

names = ["classifier_mlp", "classifier_cnn"]

# loop over models
for name in names:
    print("\n-> START {}".format(name))
    # Create a folder (REMEMBER: You must mount your drive if you use Colab!)
    if name == "classifier_mlp":
        name = name + "_M_" + str(M)
    elif name == "classifier_cnn":
        name = name + "_M_" + str(M) + "_kernels_" + str(num_kernels)

    # Create a folder if necessary
    result_dir = os.path.join(results_dir, "results", name + "/")

    # =========
    # MAKE SURE THAT "result_dir" IS A PATH TO A LOCAL FOLDER OR A GOOGLE COLAB FOLDER (DEFINED IN CELL 3)
    result_dir = "./"  # (current folder)
    # =========
    if not (os.path.exists(result_dir)):
        os.mkdir(result_dir)

    # MLP
    if name[0:14] == "classifier_mlp":
        classnet = nn.Sequential(
            nn.Linear(D, M),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(M, M),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(M, K),
            nn.LogSoftmax(dim=1))
        # You are asked here to propose your own architecture
        # NOTE: Please remember that the output must be LogSoftmax!
        # ------
        pass

    # CNN
    elif name[0:14] == "classifier_cnn":
        classnet = nn.Sequential(
          Reshape(size=(1, 8, 8)),
          nn.Conv2d(in_channels=1, out_channels=num_kernels, kernel_size=3),
          nn.ReLU(),
          nn.Conv2d(in_channels=num_kernels, out_channels=num_kernels*2, kernel_size=3),
          nn.ReLU(),
          Flatten(),
          nn.Linear(num_kernels*2*4*4, M),
          nn.ReLU(),
          nn.Linear(M, K),
          nn.LogSoftmax(dim=1)
        )
        pass

    # Init ClassifierNN
    model = ClassifierNeuralNet(classnet)

    # Init OPTIMIZER (here we use ADAMAX)
    optimizer = torch.optim.Adamax(
        [p for p in model.parameters() if p.requires_grad == True],
        lr=lr,
        weight_decay=wd,
    )

    # Training procedure
    nll_val, error_val = training(
        name=result_dir + name,
        max_patience=max_patience,
        num_epochs=num_epochs,
        model=model,
        optimizer=optimizer,
        training_loader=training_loader,
        val_loader=val_loader,
    )

    # The final evaluation (on the test set)
    test_loss, test_error = evaluation(name=result_dir + name, test_loader=test_loader)
    # write the results to a file
    f = open(result_dir + name + "_test_loss.txt", "w")
    f.write("NLL: " + str(test_loss) + "\nCE: " + str(test_error))
    f.close()
    # create curves
    plot_curve(
        result_dir + name,
        nll_val,
        file_name="_nll_val_curve.pdf",
        ylabel="nll",
        test_eval=test_loss,
    )
    plot_curve(
        result_dir + name,
        error_val,
        file_name="_ca_val_curve.pdf",
        ylabel="ce",
        color="r-",
        test_eval=test_error,
    )

完整代码在这里：

# PLEASE DO NOT REMOVE!
# Here are two auxiliary functions that can be used for a convolutional NN (CNN).


# This module reshapes an input (matrix -> tensor).
class Reshape(nn.Module):
    def __init__(self, size):
        super(Reshape, self).__init__()
        self.size = size  # a list

    def forward(self, x):
        assert x.shape[1] == np.prod(self.size)
        return x.view(x.shape[0], *self.size)


# This module flattens an input (tensor -> matrix) by blending dimensions
# beyond the batch size.
class Flatten(nn.Module):
    def __init__(self):
        super(Flatten, self).__init__() 

    def forward(self, x):
        return x.view(x.shape[0], -1)

# =========
# GRADING:
# 0
# 0.5 pt if code works but it is explained badly
# 1.0 pt if code works and it is explained well
# =========
# Implement a neural network (NN) classifier.
class ClassifierNeuralNet(nn.Module):
    def __init__(self, classnet):
        super(ClassifierNeuralNet, self).__init__()
        # We provide a sequential module with layers and activations
        self.classnet = classnet
        # The loss function (the negative log-likelihood)
        self.nll = nn.NLLLoss(reduction="none")  # it requires log-softmax as input!!

    # This function classifies an image x to a class.
    # The output must be a class label (long).
    def classify(self, x):
        # using classnet to perform a forward pass on the image
        out = self.classnet(x)
        # using argmax to gain the class with maximum probability
        y_pred = out.argmax(dim=1)
        return y_pred

    # This function is crucial for a module in PyTorch.
    # In our framework, this class outputs a value of the loss function.
    def forward(self, x, y, reduction="avg"):
        # using classnet to perform a forward pass on the image  
        out = self.classnet(x)
        print(out.shape)
        # passing the result of forward pass to nll loss function
        loss = self.nll(out,y)
        # return the result base on the reduction parameter
        if reduction == "sum":
            return loss.sum()
        else:
            return loss.mean()

# Initialize hyperparameters
# Hyperparameters
# -> data hyperparams
D = 3072  # input dimension

# -> model hyperparams
M = 256  # the number of neurons in scale (s) and translation (t) nets
K = 10  # the number of labels

# -> training hyperparams
lr = 1e-3  # learning rate
wd = 1e-5  # weight decay
num_epochs = 1000  # max. number of epochs
max_patience = 20  # an early stopping is used, if training doesn't improve for longer than 20 epochs, it is stopped

name = 'New_CNN' + "_M_" + str(M) + "_kernels_"

# Create a folder if necessary
result_dir = os.path.join(results_dir, "results", name + "/")

# =========
# MAKE SURE THAT "result_dir" IS A PATH TO A LOCAL FOLDER OR A GOOGLE COLAB FOLDER (DEFINED IN CELL 3)
result_dir = "./"  # (current folder)
# =========
if not (os.path.exists(result_dir)):
    os.mkdir(result_dir)

classnet = nn.Sequential(
      nn.Conv2d(3, 16, kernel_size=5),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Conv2d(16, 512, kernel_size=7),
      nn.ReLU(inplace=True),
      nn.MaxPool2d(kernel_size=2, stride=2),
      nn.Flatten(),
      nn.Linear(512 * 5 * 5, 10),
      nn.LogSoftmax(dim=1)
      )

# Init ClassifierNN
model = ClassifierNeuralNet(classnet)

# Init OPTIMIZER (here we use ADAMAX)
optimizer = torch.optim.Adamax(
    [p for p in model.parameters() if p.requires_grad == True],
    lr=lr,
    weight_decay=wd,
)

# Training procedure
nll_val, error_val = training(
    name=result_dir + name,
    max_patience=max_patience,
    num_epochs=num_epochs,
    model=model,
    optimizer=optimizer,
    training_loader=training_loader,
    val_loader=val_loader,
)

# The final evaluation (on the test set)
test_loss, test_error = evaluation(name=result_dir + name, test_loader=test_loader)
# write the results to a file
f = open(result_dir + name + "_test_loss.txt", "w")
f.write("NLL: " + str(test_loss) + "\nCE: " + str(test_error))
f.close()
# create curves
plot_curve(
    result_dir + name,
    nll_val,
    file_name="_nll_val_curve.pdf",
    ylabel="nll",
    test_eval=test_loss,
)
plot_curve(
    result_dir + name,
    error_val,
    file_name="_ca_val_curve.pdf",
    ylabel="ce",
    color="r-",
    test_eval=test_error,
)

这是完整的错误信息：

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-25-4846604cd120> in <cell line: 36>()
     34 
     35 # Training procedure
---> 36 nll_val, error_val = training(
     37     name=result_dir + name,
     38     max_patience=max_patience,

5 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
    112 
    113     def forward(self, input: Tensor) -> Tensor:
--> 114         return F.linear(input, self.weight, self.bias)
    115 
    116     def extra_repr(self) -> str:

RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x8192 and 12800x10)

Pytorch：mat1 和 mat2 形状不能相乘（64x8192 和 12800x10）

问题描述投票：0回答：0

最新问题

Pytorch：mat1 和 mat2 形状不能相乘（64x8192 和 12800x10）

问题描述 投票：0回答：0

最新问题

问题描述投票：0回答：0