当我尝试在 YOLO 上使用 Self-Attention 时,出现 NotImplementedError。
类 BertSelfAttention(nn.Module): def init(自身,配置): 超级()。init() assert config["hidden_size"] % config["num_of_attention_heads"] == 0, "隐藏大小不是注意力头数量的倍数"
self.num_attention_heads = config['num_of_attention_heads']
self.attention_head_size = int(config['hidden_size'] / config['num_of_attention_heads'])
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.query = nn.ModuleList([nn.Linear(config['hidden_size'], self.all_head_size)])
self.key = nn.ModuleList([nn.Linear(config['hidden_size'], self.all_head_size)])
self.value = nn.ModuleList([nn.Linear(config['hidden_size'], self.all_head_size)])
self.dense = nn.ModuleList([nn.Linear(config['hidden_size'], config['hidden_size'])])
def transpose_for_scores(self, x):
new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
x = x.view(*new_x_shape)
return x.permute(0, 2, 1, 3)
def forward(self, hidden_states):
# mixed_query_layer = self.query(hidden_states) # [Batch_size x Seq_length x Hidden_size]
# mixed_key_layer = self.key(hidden_states) # [Batch_size x Seq_length x Hidden_size]
# mixed_value_layer = self.value(hidden_states) # [Batch_size x Seq_length x Hidden_size]
size = hidden_states.size(dim=1)
query_layer = self.transpose_for_scores(self.query(torch.randn(16,size))) # [Batch_size x Num_of_heads x Seq_length x Head_size]
key_layer = self.transpose_for_scores(self.key(torch.randn(16,size))) # [Batch_size x Num_of_heads x Seq_length x Head_size]
value_layer = self.transpose_for_scores(self.value(torch.randn(16,size))) # [Batch_size x Num_of_heads x Seq_length x Head_size]
attention_scores = torch.matmul(query_layer, key_layer.transpose(-1,-2)) # [Batch_size x Num_of_heads x Seq_length x Seq_length]
attention_scores = attention_scores / math.sqrt(self.attention_head_size) # [Batch_size x Num_of_heads x Seq_length x Seq_length]
attention_probs = nn.Softmax(dim=-1)(attention_scores) # [Batch_size x Num_of_heads x Seq_length x Seq_length]
context_layer = torch.matmul(attention_probs, value_layer) # [Batch_size x Num_of_heads x Seq_length x Head_size]
context_layer = context_layer.permute(0, 2, 1, 3).contiguous() # [Batch_size x Seq_length x Num_of_heads x Head_size]
new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) # [Batch_size x Seq_length x Hidden_size]
context_layer = context_layer.view(*new_context_layer_shape) # [Batch_size x Seq_length x Hidden_size]
output = self.dense(context_layer)
return output
这就是结果:(
训练开始...
Epoch lr iou_loss dfl_loss cls_loss
0%| | 0/38 [00:11 训练步骤错误。 训练循环或评估/保存模型中出现错误。 回溯(最近一次调用最后一次): 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/tools/train.py”,第 143 行,位于 主要(参数) 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/tools/train.py”,第 133 行,主目录 训练师.train() 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/core/engine.py”,第 121 行,列车中 self.train_one_epoch(self.epoch) 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/core/engine.py”,第 135 行,train_one_epoch self.train_in_steps(epoch_num, self.step) 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/core/engine.py”,第 152 行,train_in_steps 中 preds, s_featmaps = self.model(图像) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1518 行,在 _wrapped_call_impl 中 返回 self._call_impl(*args, **kwargs) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1527 行,在 _call_impl 中 返回forward_call(*args, **kwargs) 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/models/yolo.py”,第 36 行,向前 x = self.neck(x) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1518 行,在 _wrapped_call_impl 中 返回 self._call_impl(*args, **kwargs) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1527 行,在 _call_impl 中 返回forward_call(*args, **kwargs) 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/models/reppan.py”,第 362 行,前进 fpn_out0 = self.selfattention(x0) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1518 行,在 _wrapped_call_impl 中 返回 self._call_impl(*args, **kwargs) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1527 行,在 _call_impl 中 返回forward_call(*args, **kwargs) 文件“/content/drive/MyDrive/KULIAH/SKRIPSI/YOLOv6/yolov6/layers/common.py”,第 50 行,向前 mix_query_layer = self.query(hidden_states) # [Batch_size x Seq_length x Hidden_size] 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1518 行,在 _wrapped_call_impl 中 返回 self._call_impl(*args, **kwargs) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 1527 行,在 _call_impl 中 返回forward_call(*args, **kwargs) 文件“/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py”,第 372 行,在 _forward_unimplemented 中 raise NotImplementedError(f"模块 [{type(self).name}] 缺少所需的“转发”功能”) NotImplementedError:模块 [ModuleList] 缺少所需的“转发”功能
A
ModuleList
只是一个跟踪 pytorch 对象/参数的列表。您无法调用它,因为它没有转发方法。我不知道为什么你要把一个 pytorch 模块放在 ModuleList
中 - 你可以只拥有自己的模块。
如果您正在寻找多头注意力的纯 pytorch 实现,您可以检查此实现