我正在尝试实现一个 CNN 来对图像中的数字进行分类。当我尝试训练我的神经网络时会出现此错误。
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x8192 and 12800x10)
这是我的 CNN 的架构。它有两个隐藏层,每层都有不同的内核大小和内核数量:
classnet = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=5),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 512, kernel_size=7),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(512 * 5 * 5, 10),
nn.LogSoftmax(dim=1)
)
这里是 CNN 的主类:
class ClassifierNeuralNet(nn.Module):
def __init__(self, classnet):
super(ClassifierNeuralNet, self).__init__()
# We provide a sequential module with layers and activations
self.classnet = classnet
# The loss function (the negative log-likelihood)
self.nll = nn.NLLLoss(reduction="none") # it requires log-softmax as input!!
# This function classifies an image x to a class.
# The output must be a class label (long).
def classify(self, x):
# using classnet to perform a forward pass on the image
out = self.classnet(x)
# using argmax to gain the class with maximum probability
y_pred = out.argmax(dim=1)
return y_pred
# This function is crucial for a module in PyTorch.
# In our framework, this class outputs a value of the loss function.
def forward(self, x, y, reduction="avg"):
# using classnet to perform a forward pass on the image
out = self.classnet(x)
print(out.shape)
# passing the result of forward pass to nll loss function
loss = self.nll(out,y)
# return the result base on the reduction parameter
if reduction == "sum":
return loss.sum()
else:
return loss.mean()
主要班级从事另一项任务(从 8x8 图像中识别数字),现在我正尝试将其应用到更大的 CNN 上,以识别 32x32 图像中的数字。
这是我的“旧”架构和运行/评估部分:
names = ["classifier_mlp", "classifier_cnn"]
# loop over models
for name in names:
print("\n-> START {}".format(name))
# Create a folder (REMEMBER: You must mount your drive if you use Colab!)
if name == "classifier_mlp":
name = name + "_M_" + str(M)
elif name == "classifier_cnn":
name = name + "_M_" + str(M) + "_kernels_" + str(num_kernels)
# Create a folder if necessary
result_dir = os.path.join(results_dir, "results", name + "/")
# =========
# MAKE SURE THAT "result_dir" IS A PATH TO A LOCAL FOLDER OR A GOOGLE COLAB FOLDER (DEFINED IN CELL 3)
result_dir = "./" # (current folder)
# =========
if not (os.path.exists(result_dir)):
os.mkdir(result_dir)
# MLP
if name[0:14] == "classifier_mlp":
classnet = nn.Sequential(
nn.Linear(D, M),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(M, M),
nn.ReLU(),
nn.Dropout(p=0.5),
nn.Linear(M, K),
nn.LogSoftmax(dim=1))
# You are asked here to propose your own architecture
# NOTE: Please remember that the output must be LogSoftmax!
# ------
pass
# CNN
elif name[0:14] == "classifier_cnn":
classnet = nn.Sequential(
Reshape(size=(1, 8, 8)),
nn.Conv2d(in_channels=1, out_channels=num_kernels, kernel_size=3),
nn.ReLU(),
nn.Conv2d(in_channels=num_kernels, out_channels=num_kernels*2, kernel_size=3),
nn.ReLU(),
Flatten(),
nn.Linear(num_kernels*2*4*4, M),
nn.ReLU(),
nn.Linear(M, K),
nn.LogSoftmax(dim=1)
)
pass
# Init ClassifierNN
model = ClassifierNeuralNet(classnet)
# Init OPTIMIZER (here we use ADAMAX)
optimizer = torch.optim.Adamax(
[p for p in model.parameters() if p.requires_grad == True],
lr=lr,
weight_decay=wd,
)
# Training procedure
nll_val, error_val = training(
name=result_dir + name,
max_patience=max_patience,
num_epochs=num_epochs,
model=model,
optimizer=optimizer,
training_loader=training_loader,
val_loader=val_loader,
)
# The final evaluation (on the test set)
test_loss, test_error = evaluation(name=result_dir + name, test_loader=test_loader)
# write the results to a file
f = open(result_dir + name + "_test_loss.txt", "w")
f.write("NLL: " + str(test_loss) + "\nCE: " + str(test_error))
f.close()
# create curves
plot_curve(
result_dir + name,
nll_val,
file_name="_nll_val_curve.pdf",
ylabel="nll",
test_eval=test_loss,
)
plot_curve(
result_dir + name,
error_val,
file_name="_ca_val_curve.pdf",
ylabel="ce",
color="r-",
test_eval=test_error,
)
完整代码在这里:
# PLEASE DO NOT REMOVE!
# Here are two auxiliary functions that can be used for a convolutional NN (CNN).
# This module reshapes an input (matrix -> tensor).
class Reshape(nn.Module):
def __init__(self, size):
super(Reshape, self).__init__()
self.size = size # a list
def forward(self, x):
assert x.shape[1] == np.prod(self.size)
return x.view(x.shape[0], *self.size)
# This module flattens an input (tensor -> matrix) by blending dimensions
# beyond the batch size.
class Flatten(nn.Module):
def __init__(self):
super(Flatten, self).__init__()
def forward(self, x):
return x.view(x.shape[0], -1)
# =========
# GRADING:
# 0
# 0.5 pt if code works but it is explained badly
# 1.0 pt if code works and it is explained well
# =========
# Implement a neural network (NN) classifier.
class ClassifierNeuralNet(nn.Module):
def __init__(self, classnet):
super(ClassifierNeuralNet, self).__init__()
# We provide a sequential module with layers and activations
self.classnet = classnet
# The loss function (the negative log-likelihood)
self.nll = nn.NLLLoss(reduction="none") # it requires log-softmax as input!!
# This function classifies an image x to a class.
# The output must be a class label (long).
def classify(self, x):
# using classnet to perform a forward pass on the image
out = self.classnet(x)
# using argmax to gain the class with maximum probability
y_pred = out.argmax(dim=1)
return y_pred
# This function is crucial for a module in PyTorch.
# In our framework, this class outputs a value of the loss function.
def forward(self, x, y, reduction="avg"):
# using classnet to perform a forward pass on the image
out = self.classnet(x)
print(out.shape)
# passing the result of forward pass to nll loss function
loss = self.nll(out,y)
# return the result base on the reduction parameter
if reduction == "sum":
return loss.sum()
else:
return loss.mean()
# Initialize hyperparameters
# Hyperparameters
# -> data hyperparams
D = 3072 # input dimension
# -> model hyperparams
M = 256 # the number of neurons in scale (s) and translation (t) nets
K = 10 # the number of labels
# -> training hyperparams
lr = 1e-3 # learning rate
wd = 1e-5 # weight decay
num_epochs = 1000 # max. number of epochs
max_patience = 20 # an early stopping is used, if training doesn't improve for longer than 20 epochs, it is stopped
name = 'New_CNN' + "_M_" + str(M) + "_kernels_"
# Create a folder if necessary
result_dir = os.path.join(results_dir, "results", name + "/")
# =========
# MAKE SURE THAT "result_dir" IS A PATH TO A LOCAL FOLDER OR A GOOGLE COLAB FOLDER (DEFINED IN CELL 3)
result_dir = "./" # (current folder)
# =========
if not (os.path.exists(result_dir)):
os.mkdir(result_dir)
classnet = nn.Sequential(
nn.Conv2d(3, 16, kernel_size=5),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(16, 512, kernel_size=7),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Flatten(),
nn.Linear(512 * 5 * 5, 10),
nn.LogSoftmax(dim=1)
)
# Init ClassifierNN
model = ClassifierNeuralNet(classnet)
# Init OPTIMIZER (here we use ADAMAX)
optimizer = torch.optim.Adamax(
[p for p in model.parameters() if p.requires_grad == True],
lr=lr,
weight_decay=wd,
)
# Training procedure
nll_val, error_val = training(
name=result_dir + name,
max_patience=max_patience,
num_epochs=num_epochs,
model=model,
optimizer=optimizer,
training_loader=training_loader,
val_loader=val_loader,
)
# The final evaluation (on the test set)
test_loss, test_error = evaluation(name=result_dir + name, test_loader=test_loader)
# write the results to a file
f = open(result_dir + name + "_test_loss.txt", "w")
f.write("NLL: " + str(test_loss) + "\nCE: " + str(test_error))
f.close()
# create curves
plot_curve(
result_dir + name,
nll_val,
file_name="_nll_val_curve.pdf",
ylabel="nll",
test_eval=test_loss,
)
plot_curve(
result_dir + name,
error_val,
file_name="_ca_val_curve.pdf",
ylabel="ce",
color="r-",
test_eval=test_error,
)
这是完整的错误信息:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-25-4846604cd120> in <cell line: 36>()
34
35 # Training procedure
---> 36 nll_val, error_val = training(
37 name=result_dir + name,
38 max_patience=max_patience,
5 frames
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x8192 and 12800x10)