我正在尝试编写一个代码,使我能够概括预训练模型的各层,以生成相当于原始模型两倍的元模型。
为此,我使用卷积层,使用组大小概念生成两个输出,现在,我希望简单地复制其余层。这样,卷积输出的前半部分将进入层,例如relu 1_1,第二部分将进入relu1_2。
我发现的主要问题是许多预训练网络不是仅使用简单层定义的,而是在其拓扑中使用层分组。
在以下代码中:
for name, layer in model.named_children():
if (isinstance(layer, nn.Sequential) or list(layer.children())):
for subname, sublayer in layer.named_children():
我已将其用作条件来尝试区分简单层和层分组,但即便如此,在大多数模型中,条件也会将层分组视为简单层。
这里我留下完整的代码:
import torch
import torch.nn as nn
import copy
from torchvision import models
from collections import OrderedDict
# Custom class for performing double convolutions with group_size
class ConvolutionalBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=2, filter_values=None, bias=True):
super(ConvolutionalBlock, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=bias)
if filter_values is not None:
self.initialize_manual_weights(filter_values)
else:
self.initialize_integer_weights()
# Set the weight and bias to float16 after initialization
self.conv.weight.data = self.conv.weight.data.to(torch.float16)
if self.conv.bias is not None:
self.conv.bias.data = self.conv.bias.data.to(torch.float16)
def initialize_integer_weights(self):
int_weights = torch.randint(-5, 5, self.conv.weight.shape, dtype=torch.int16, device=self.conv.weight.device)
self.conv.weight.data = int_weights.to(dtype=torch.float16) # Ensure weights are float16
if self.conv.bias is not None:
int_bias = torch.zeros(self.conv.bias.shape, dtype=torch.int16, device=self.conv.bias.device)
self.conv.bias.data = int_bias.to(dtype=torch.float16) # Ensure bias is also float16
def initialize_manual_weights(self, filter_values):
filter_tensor = torch.tensor(filter_values, dtype=torch.float16, device=self.conv.weight.device).view(self.conv.weight.shape)
self.conv.weight.data = filter_tensor # Weights are already float16
if self.conv.bias is not None:
self.conv.bias.data.fill_(0.0)
self.conv.bias.data = self.conv.bias.data.to(dtype=torch.float16) # Ensure bias is float16
def forward(self, x):
return self.conv(x)
# Function to copy and modify the model
def copy_and_modify_cnn_model(model):
modified_layers = OrderedDict()
for name, layer in model.named_children():
if (isinstance(layer, nn.Sequential) or list(layer.children())):
for subname, sublayer in layer.named_children():
if isinstance(sublayer, nn.Conv2d):
in_channels = sublayer.in_channels
out_channels = sublayer.out_channels
kernel_size = sublayer.kernel_size
stride = sublayer.stride
padding = sublayer.padding
modified_layers[subname] = ConvolutionalBlock(
in_channels=in_channels * 2,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=2
)
else:
modified_layers[subname + "_1"] = copy.deepcopy(sublayer)
modified_layers[subname + "_2"] = copy.deepcopy(sublayer)
else:
if isinstance(layer, nn.Conv2d):
in_channels = layer.in_channels
out_channels = layer.out_channels
kernel_size = layer.kernel_size
stride = layer.stride
padding = layer.padding
modified_layers[name] = ConvolutionalBlock(
in_channels=in_channels * 2,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=2
)
else:
modified_layers[name + "_1"] = copy.deepcopy(layer)
modified_layers[name + "_2"] = copy.deepcopy(layer)
new_model = nn.Sequential(modified_layers)
return new_model
# Load a pre-trained model
original_model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
# Set the random seed for reproducibility
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# Move the model to the GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
original_model.to(device)
# Create the modified model
new_model = copy_and_modify_cnn_model(original_model).to(device).half() # Convert the model to float16
# Check the new model
print("modelo original")
print(original_model)
print("nuevo modelo ")
print(new_model)
# Create a synthetic input tensor
input_tensor_1 = torch.randn(1, 3, 224, 224, device=device) # Original input tensor with 3 channels
synthetic_input_tensor = torch.cat((input_tensor_1, input_tensor_1), dim=1).to(torch.float16)
# Test the output of the modified model
output_modified = new_model(synthetic_input_tensor)
print("\nOutput of the modified model:")
print(output_modified)
最后,我留下了尝试为 resnet18 或 vgg16 等模型执行代码时的示例,您可以在其中看到,当尝试复制模型时,存在它无法检测到的层或它无法检测到的层分组作为松散的层。
VGG16 原始图层:
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace=True)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace=True)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace=True)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace=True)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace=True)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace=True)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace=True)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace=True)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace=True)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace=True)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace=True)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
VGG16 重复:
Sequential(
(0): ConvolutionalBlock(
(conv): Conv2d(6, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(1_1): ReLU(inplace=True)
(1_2): ReLU(inplace=True)
(2): ConvolutionalBlock(
(conv): Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(3_1): Linear(in_features=4096, out_features=4096, bias=True)
(3_2): Linear(in_features=4096, out_features=4096, bias=True)
(4_1): ReLU(inplace=True)
(4_2): ReLU(inplace=True)
(5): ConvolutionalBlock(
(conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(6_1): Linear(in_features=4096, out_features=1000, bias=True)
(6_2): Linear(in_features=4096, out_features=1000, bias=True)
(7): ConvolutionalBlock(
(conv): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(8_1): ReLU(inplace=True)
(8_2): ReLU(inplace=True)
(9_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(9_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): ConvolutionalBlock(
(conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(11_1): ReLU(inplace=True)
(11_2): ReLU(inplace=True)
(12): ConvolutionalBlock(
(conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(13_1): ReLU(inplace=True)
(13_2): ReLU(inplace=True)
(14): ConvolutionalBlock(
(conv): Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(15_1): ReLU(inplace=True)
(15_2): ReLU(inplace=True)
(16_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(16_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): ConvolutionalBlock(
(conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(18_1): ReLU(inplace=True)
(18_2): ReLU(inplace=True)
(19): ConvolutionalBlock(
(conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(20_1): ReLU(inplace=True)
(20_2): ReLU(inplace=True)
(21): ConvolutionalBlock(
(conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(22_1): ReLU(inplace=True)
(22_2): ReLU(inplace=True)
(23_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(23_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): ConvolutionalBlock(
(conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(25_1): ReLU(inplace=True)
(25_2): ReLU(inplace=True)
(26): ConvolutionalBlock(
(conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(27_1): ReLU(inplace=True)
(27_2): ReLU(inplace=True)
(28): ConvolutionalBlock(
(conv): Conv2d(1024, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=2)
)
(29_1): ReLU(inplace=True)
(29_2): ReLU(inplace=True)
(30_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(30_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(avgpool_1): AdaptiveAvgPool2d(output_size=(7, 7))
(avgpool_2): AdaptiveAvgPool2d(output_size=(7, 7))
(0_1): Linear(in_features=25088, out_features=4096, bias=True)
(0_2): Linear(in_features=25088, out_features=4096, bias=True)
(2_1): Dropout(p=0.5, inplace=False)
(2_2): Dropout(p=0.5, inplace=False)
(5_1): Dropout(p=0.5, inplace=False)
(5_2): Dropout(p=0.5, inplace=False)
)
在这种情况下,分类器的第 1,3,4,6 层未被检测到
ResNet18 原始层:
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1): BasicBlock(
(conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(layer2): Sequential(
(0): BasicBlock(
(conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, aoutput
(layer4): Sequential(
(0): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=512, out_features=1000, bias=True)
)
ResNet18 重复:
Sequential(
(conv1): ConvolutionalBlock(
(conv): Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), groups=2)
)
(bn1_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(bn1_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_1): ReLU(inplace=True)
(relu_2): ReLU(inplace=True)
(maxpool_1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(maxpool_2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(0_1): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(0_2): BasicBlock(
(conv1): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1_1): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(1_2): BasicBlock(
(conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(avgpool_1): AdaptiveAvgPool2d(output_size=(1, 1))
(avgpool_2): AdaptiveAvgPool2d(output_size=(1, 1))
(fc_1): Linear(in_features=512, out_features=1000, bias=True)
(fc_2): Linear(in_features=512, out_features=1000, bias=True)
在这种情况下,更复杂,层也会丢失,并且其他层作为块而不是单个层复制在一起。
我的主要问题是,如何逐层复制任何预训练模型的拓扑,无论它是如何定义的?
您可以简单地深入研究目标预训练模型的源代码,并以任何您想要的方式直接操作目标层。它们与 Python 类中的任何其他变量没有什么不同。
顺便说一句,终端上打印的模块名称并不是它们在 python 代码中的实际名称。它们是根据简单的规则合成的。例如,
conv1
通常意味着源代码看起来像self.conv = nn.ModuleList([nn.Conv2d for _ in range(n)])
,它指的是列表中的第二个卷积层。