我正在 Pytorch 中编写 Yolov3。 架构:https://i.sstatic.net/mncjfiDs.png
代码:
class Convolutional(nn.Module): # DBL
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding=1):
super().__init__()
self._stack = nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.1),
)
def forward(self, x):
return self._stack(x)
class Detection(nn.Module):
def __init__(self, in_channels: int, C, B): # where A is number of anchors
super().__init__()
out_channels = in_channels // 2
self.stack = nn.Sequential(
Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, padding=0),
Convolutional(in_channels=out_channels, out_channels=in_channels, kernel_size=3),
nn.Conv2d(in_channels=in_channels, out_channels=((B * 5) + C), kernel_size=1, padding=0)
)
def forward(self, x):
out = self.stack(x)
print(out.shape)
return out
class FPN(nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv1 = Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, padding=0)
self.up = nn.ConvTranspose2d(out_channels, out_channels, kernel_size=2, stride=2)
self.conv3 = Convolutional(in_channels=(out_channels * 2), out_channels=out_channels, kernel_size=3)
def forward(self, x, skip):
x = self.conv1(x)
x = self.up(x)
out = torch.cat([x, skip], dim=1)
out = self.conv3(out)
return out
class DBLx5(nn.Module):
def __init__(self, in_channels):
super().__init__()
out_channels = in_channels // 2
self.stack = nn.Sequential(
Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
Convolutional(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1),
Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
Convolutional(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1),
Convolutional(in_channels=in_channels, out_channels=in_channels, kernel_size=1, stride=1)
)
def forward(self, x):
out = self.stack(x)
return x
class Residual(nn.Module): # ResUnit
def __init__(self, in_channels: int):
super().__init__()
self._conv_stack = nn.Sequential(
Convolutional(in_channels=in_channels, out_channels=(in_channels // 2), kernel_size=1, padding=0),
Convolutional(in_channels=(in_channels // 2), out_channels=in_channels, kernel_size=3, padding=1),
)
def forward(self, x):
out = self._conv_stack(x)
return x + out
class Darknet(nn.Module):
def __init__(self):
super().__init__()
# Res11
self.stack_list1 = list()
self.stack_list1 = [
Convolutional(in_channels=3, out_channels=32, kernel_size=3),
Convolutional(in_channels=32, out_channels=64, kernel_size=3, stride=2),
Residual(64),
Convolutional(in_channels=64, out_channels=128, kernel_size=3, stride=2),
]
for _ in range(2):
self.stack_list1.append(Residual(128))
self.stack_list1.append(
Convolutional(in_channels=128, out_channels=256, kernel_size=3, stride=2)
)
for _ in range(8):
self.stack_list1.append(Residual(256))
# Res 8
self.stack_list2 = list()
self.stack_list2.append(
Convolutional(in_channels=256, out_channels=512, kernel_size=3, stride=2)
)
for _ in range(8):
self.stack_list2.append(Residual(512))
# Res4
self.stack_list3 = list()
self.stack_list3.append(
Convolutional(in_channels=512, out_channels=1024, kernel_size=3, stride=2)
)
for _ in range(4):
self.stack_list3.append(Residual(1024))
self.stack_list1 = nn.Sequential(*self.stack_list1)
self.stack_list2 = nn.Sequential(*self.stack_list2)
self.stack_list3 = nn.Sequential(*self.stack_list3)
def forward(self, x):
out1 = self.stack_list1(x)
out2 = self.stack_list2(out1)
out3 = self.stack_list3(out2)
return out1, out2, out3
class YOLOv3(nn.Module):
def __init__(self, C, B=2):
super().__init__()
self.darknet = Darknet() # out size 19x19
self.dbl5_1 = DBLx5(1024)
self.dbl5_2 = DBLx5(512)
self.detection_1 = Detection(1024, C, B)
self.detection_2 = Detection(512, C, B)
self.detection_3 = Detection(256, C, B)
self.fpn_1 = FPN(1024, 512)
self.fpn_2 = FPN(512, 256)
def forward(self, x):
# x1 - 256x256
# x2 - 512x512
# x3 - 1024x1024
x1, x2, x3 = self.darknet(x)
out1 = self.dbl5_1(x3)
out2 = self.fpn_1(out1, x2)
out2 = self.dbl5_2(out2)
out3 = self.fpn_2(out2, x1)
out1 = self.detection_1(out1)
out2 = self.detection_2(out2)
out3 = self.detection_3(out3)
return out1, out2, out3
Torchsummary 给出了一些不切实际的大模型尺寸。当训练以获得模型预测时,它会给出 CUDA 内存不足错误。我认为问题出在 Darknet-53 上。单独来看,模型的重量正常,但如果将其导入 Yolo,就会出现这个巨大的数字。 (图像尺寸为3x416x416) 摘要输出:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 32, 416, 416] 864
BatchNorm2d-2 [-1, 32, 416, 416] 64
LeakyReLU-3 [-1, 32, 416, 416] 0
Convolutional-4 [-1, 32, 416, 416] 0
Conv2d-5 [-1, 64, 208, 208] 18,432
BatchNorm2d-6 [-1, 64, 208, 208] 128
LeakyReLU-7 [-1, 64, 208, 208] 0
Convolutional-8 [-1, 64, 208, 208] 0
Conv2d-9 [-1, 32, 208, 208] 2,048
BatchNorm2d-10 [-1, 32, 208, 208] 64
LeakyReLU-11 [-1, 32, 208, 208] 0
Convolutional-12 [-1, 32, 208, 208] 0
Conv2d-13 [-1, 64, 208, 208] 18,432
BatchNorm2d-14 [-1, 64, 208, 208] 128
LeakyReLU-15 [-1, 64, 208, 208] 0
Convolutional-16 [-1, 64, 208, 208] 0
Residual-17 [-1, 64, 208, 208] 0
Conv2d-18 [-1, 128, 104, 104] 73,728
BatchNorm2d-19 [-1, 128, 104, 104] 256
LeakyReLU-20 [-1, 128, 104, 104] 0
Convolutional-21 [-1, 128, 104, 104] 0
Conv2d-22 [-1, 64, 104, 104] 8,192
BatchNorm2d-23 [-1, 64, 104, 104] 128
LeakyReLU-24 [-1, 64, 104, 104] 0
Convolutional-25 [-1, 64, 104, 104] 0
Conv2d-26 [-1, 128, 104, 104] 73,728
BatchNorm2d-27 [-1, 128, 104, 104] 256
LeakyReLU-28 [-1, 128, 104, 104] 0
Convolutional-29 [-1, 128, 104, 104] 0
Residual-30 [-1, 128, 104, 104] 0
Conv2d-31 [-1, 64, 104, 104] 8,192
BatchNorm2d-32 [-1, 64, 104, 104] 128
LeakyReLU-33 [-1, 64, 104, 104] 0
Convolutional-34 [-1, 64, 104, 104] 0
Conv2d-35 [-1, 128, 104, 104] 73,728
BatchNorm2d-36 [-1, 128, 104, 104] 256
LeakyReLU-37 [-1, 128, 104, 104] 0
Convolutional-38 [-1, 128, 104, 104] 0
Residual-39 [-1, 128, 104, 104] 0
Conv2d-40 [-1, 256, 52, 52] 294,912
BatchNorm2d-41 [-1, 256, 52, 52] 512
LeakyReLU-42 [-1, 256, 52, 52] 0
Convolutional-43 [-1, 256, 52, 52] 0
Conv2d-44 [-1, 128, 52, 52] 32,768
BatchNorm2d-45 [-1, 128, 52, 52] 256
LeakyReLU-46 [-1, 128, 52, 52] 0
Convolutional-47 [-1, 128, 52, 52] 0
Conv2d-48 [-1, 256, 52, 52] 294,912
BatchNorm2d-49 [-1, 256, 52, 52] 512
LeakyReLU-50 [-1, 256, 52, 52] 0
Convolutional-51 [-1, 256, 52, 52] 0
Residual-52 [-1, 256, 52, 52] 0
Conv2d-53 [-1, 128, 52, 52] 32,768
BatchNorm2d-54 [-1, 128, 52, 52] 256
LeakyReLU-55 [-1, 128, 52, 52] 0
Convolutional-56 [-1, 128, 52, 52] 0
Conv2d-57 [-1, 256, 52, 52] 294,912
BatchNorm2d-58 [-1, 256, 52, 52] 512
LeakyReLU-59 [-1, 256, 52, 52] 0
Convolutional-60 [-1, 256, 52, 52] 0
Residual-61 [-1, 256, 52, 52] 0
Conv2d-62 [-1, 128, 52, 52] 32,768
BatchNorm2d-63 [-1, 128, 52, 52] 256
LeakyReLU-64 [-1, 128, 52, 52] 0
Convolutional-65 [-1, 128, 52, 52] 0
Conv2d-66 [-1, 256, 52, 52] 294,912
BatchNorm2d-67 [-1, 256, 52, 52] 512
LeakyReLU-68 [-1, 256, 52, 52] 0
Convolutional-69 [-1, 256, 52, 52] 0
Residual-70 [-1, 256, 52, 52] 0
Conv2d-71 [-1, 128, 52, 52] 32,768
BatchNorm2d-72 [-1, 128, 52, 52] 256
LeakyReLU-73 [-1, 128, 52, 52] 0
Convolutional-74 [-1, 128, 52, 52] 0
Conv2d-75 [-1, 256, 52, 52] 294,912
BatchNorm2d-76 [-1, 256, 52, 52] 512
LeakyReLU-77 [-1, 256, 52, 52] 0
Convolutional-78 [-1, 256, 52, 52] 0
Residual-79 [-1, 256, 52, 52] 0
Conv2d-80 [-1, 128, 52, 52] 32,768
BatchNorm2d-81 [-1, 128, 52, 52] 256
LeakyReLU-82 [-1, 128, 52, 52] 0
Convolutional-83 [-1, 128, 52, 52] 0
Conv2d-84 [-1, 256, 52, 52] 294,912
BatchNorm2d-85 [-1, 256, 52, 52] 512
LeakyReLU-86 [-1, 256, 52, 52] 0
Convolutional-87 [-1, 256, 52, 52] 0
Residual-88 [-1, 256, 52, 52] 0
Conv2d-89 [-1, 128, 52, 52] 32,768
BatchNorm2d-90 [-1, 128, 52, 52] 256
LeakyReLU-91 [-1, 128, 52, 52] 0
Convolutional-92 [-1, 128, 52, 52] 0
Conv2d-93 [-1, 256, 52, 52] 294,912
BatchNorm2d-94 [-1, 256, 52, 52] 512
LeakyReLU-95 [-1, 256, 52, 52] 0
Convolutional-96 [-1, 256, 52, 52] 0
Residual-97 [-1, 256, 52, 52] 0
Conv2d-98 [-1, 128, 52, 52] 32,768
BatchNorm2d-99 [-1, 128, 52, 52] 256
LeakyReLU-100 [-1, 128, 52, 52] 0
Convolutional-101 [-1, 128, 52, 52] 0
Conv2d-102 [-1, 256, 52, 52] 294,912
BatchNorm2d-103 [-1, 256, 52, 52] 512
LeakyReLU-104 [-1, 256, 52, 52] 0
Convolutional-105 [-1, 256, 52, 52] 0
Residual-106 [-1, 256, 52, 52] 0
Conv2d-107 [-1, 128, 52, 52] 32,768
BatchNorm2d-108 [-1, 128, 52, 52] 256
LeakyReLU-109 [-1, 128, 52, 52] 0
Convolutional-110 [-1, 128, 52, 52] 0
Conv2d-111 [-1, 256, 52, 52] 294,912
BatchNorm2d-112 [-1, 256, 52, 52] 512
LeakyReLU-113 [-1, 256, 52, 52] 0
Convolutional-114 [-1, 256, 52, 52] 0
Residual-115 [-1, 256, 52, 52] 0
Conv2d-116 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-117 [-1, 512, 26, 26] 1,024
LeakyReLU-118 [-1, 512, 26, 26] 0
Convolutional-119 [-1, 512, 26, 26] 0
Conv2d-120 [-1, 256, 26, 26] 131,072
BatchNorm2d-121 [-1, 256, 26, 26] 512
LeakyReLU-122 [-1, 256, 26, 26] 0
Convolutional-123 [-1, 256, 26, 26] 0
Conv2d-124 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-125 [-1, 512, 26, 26] 1,024
LeakyReLU-126 [-1, 512, 26, 26] 0
Convolutional-127 [-1, 512, 26, 26] 0
Residual-128 [-1, 512, 26, 26] 0
Conv2d-129 [-1, 256, 26, 26] 131,072
BatchNorm2d-130 [-1, 256, 26, 26] 512
LeakyReLU-131 [-1, 256, 26, 26] 0
Convolutional-132 [-1, 256, 26, 26] 0
Conv2d-133 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-134 [-1, 512, 26, 26] 1,024
LeakyReLU-135 [-1, 512, 26, 26] 0
Convolutional-136 [-1, 512, 26, 26] 0
Residual-137 [-1, 512, 26, 26] 0
Conv2d-138 [-1, 256, 26, 26] 131,072
BatchNorm2d-139 [-1, 256, 26, 26] 512
LeakyReLU-140 [-1, 256, 26, 26] 0
Convolutional-141 [-1, 256, 26, 26] 0
Conv2d-142 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-143 [-1, 512, 26, 26] 1,024
LeakyReLU-144 [-1, 512, 26, 26] 0
Convolutional-145 [-1, 512, 26, 26] 0
Residual-146 [-1, 512, 26, 26] 0
Conv2d-147 [-1, 256, 26, 26] 131,072
BatchNorm2d-148 [-1, 256, 26, 26] 512
LeakyReLU-149 [-1, 256, 26, 26] 0
Convolutional-150 [-1, 256, 26, 26] 0
Conv2d-151 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-152 [-1, 512, 26, 26] 1,024
LeakyReLU-153 [-1, 512, 26, 26] 0
Convolutional-154 [-1, 512, 26, 26] 0
Residual-155 [-1, 512, 26, 26] 0
Conv2d-156 [-1, 256, 26, 26] 131,072
BatchNorm2d-157 [-1, 256, 26, 26] 512
LeakyReLU-158 [-1, 256, 26, 26] 0
Convolutional-159 [-1, 256, 26, 26] 0
Conv2d-160 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-161 [-1, 512, 26, 26] 1,024
LeakyReLU-162 [-1, 512, 26, 26] 0
Convolutional-163 [-1, 512, 26, 26] 0
Residual-164 [-1, 512, 26, 26] 0
Conv2d-165 [-1, 256, 26, 26] 131,072
BatchNorm2d-166 [-1, 256, 26, 26] 512
LeakyReLU-167 [-1, 256, 26, 26] 0
Convolutional-168 [-1, 256, 26, 26] 0
Conv2d-169 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-170 [-1, 512, 26, 26] 1,024
LeakyReLU-171 [-1, 512, 26, 26] 0
Convolutional-172 [-1, 512, 26, 26] 0
Residual-173 [-1, 512, 26, 26] 0
Conv2d-174 [-1, 256, 26, 26] 131,072
BatchNorm2d-175 [-1, 256, 26, 26] 512
LeakyReLU-176 [-1, 256, 26, 26] 0
Convolutional-177 [-1, 256, 26, 26] 0
Conv2d-178 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-179 [-1, 512, 26, 26] 1,024
LeakyReLU-180 [-1, 512, 26, 26] 0
Convolutional-181 [-1, 512, 26, 26] 0
Residual-182 [-1, 512, 26, 26] 0
Conv2d-183 [-1, 256, 26, 26] 131,072
BatchNorm2d-184 [-1, 256, 26, 26] 512
LeakyReLU-185 [-1, 256, 26, 26] 0
Convolutional-186 [-1, 256, 26, 26] 0
Conv2d-187 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-188 [-1, 512, 26, 26] 1,024
LeakyReLU-189 [-1, 512, 26, 26] 0
Convolutional-190 [-1, 512, 26, 26] 0
Residual-191 [-1, 512, 26, 26] 0
Conv2d-192 [-1, 1024, 13, 13] 4,718,592
BatchNorm2d-193 [-1, 1024, 13, 13] 2,048
LeakyReLU-194 [-1, 1024, 13, 13] 0
Convolutional-195 [-1, 1024, 13, 13] 0
Conv2d-196 [-1, 512, 13, 13] 524,288
BatchNorm2d-197 [-1, 512, 13, 13] 1,024
LeakyReLU-198 [-1, 512, 13, 13] 0
Convolutional-199 [-1, 512, 13, 13] 0
Conv2d-200 [-1, 1024, 13, 13] 4,718,592
BatchNorm2d-201 [-1, 1024, 13, 13] 2,048
LeakyReLU-202 [-1, 1024, 13, 13] 0
Convolutional-203 [-1, 1024, 13, 13] 0
Residual-204 [-1, 1024, 13, 13] 0
Conv2d-205 [-1, 512, 13, 13] 524,288
BatchNorm2d-206 [-1, 512, 13, 13] 1,024
LeakyReLU-207 [-1, 512, 13, 13] 0
Convolutional-208 [-1, 512, 13, 13] 0
Conv2d-209 [-1, 1024, 13, 13] 4,718,592
BatchNorm2d-210 [-1, 1024, 13, 13] 2,048
LeakyReLU-211 [-1, 1024, 13, 13] 0
Convolutional-212 [-1, 1024, 13, 13] 0
Residual-213 [-1, 1024, 13, 13] 0
Conv2d-214 [-1, 512, 13, 13] 524,288
BatchNorm2d-215 [-1, 512, 13, 13] 1,024
LeakyReLU-216 [-1, 512, 13, 13] 0
Convolutional-217 [-1, 512, 13, 13] 0
Conv2d-218 [-1, 1024, 13, 13] 4,718,592
BatchNorm2d-219 [-1, 1024, 13, 13] 2,048
LeakyReLU-220 [-1, 1024, 13, 13] 0
Convolutional-221 [-1, 1024, 13, 13] 0
Residual-222 [-1, 1024, 13, 13] 0
Conv2d-223 [-1, 512, 13, 13] 524,288
BatchNorm2d-224 [-1, 512, 13, 13] 1,024
LeakyReLU-225 [-1, 512, 13, 13] 0
Convolutional-226 [-1, 512, 13, 13] 0
Conv2d-227 [-1, 1024, 13, 13] 4,718,592
BatchNorm2d-228 [-1, 1024, 13, 13] 2,048
LeakyReLU-229 [-1, 1024, 13, 13] 0
Convolutional-230 [-1, 1024, 13, 13] 0
Residual-231 [-1, 1024, 13, 13] 0
Darknet-232 [[-1, 256, 52, 52], [-1, 512, 26, 26], [-1, 1024, 13, 13]] 0
Conv2d-233 [-1, 512, 15, 15] 524,288
BatchNorm2d-234 [-1, 512, 15, 15] 1,024
LeakyReLU-235 [-1, 512, 15, 15] 0
Convolutional-236 [-1, 512, 15, 15] 0
Conv2d-237 [-1, 1024, 15, 15] 4,718,592
BatchNorm2d-238 [-1, 1024, 15, 15] 2,048
LeakyReLU-239 [-1, 1024, 15, 15] 0
Convolutional-240 [-1, 1024, 15, 15] 0
Conv2d-241 [-1, 512, 17, 17] 524,288
BatchNorm2d-242 [-1, 512, 17, 17] 1,024
LeakyReLU-243 [-1, 512, 17, 17] 0
Convolutional-244 [-1, 512, 17, 17] 0
Conv2d-245 [-1, 1024, 17, 17] 4,718,592
BatchNorm2d-246 [-1, 1024, 17, 17] 2,048
LeakyReLU-247 [-1, 1024, 17, 17] 0
Convolutional-248 [-1, 1024, 17, 17] 0
Conv2d-249 [-1, 1024, 19, 19] 1,048,576
BatchNorm2d-250 [-1, 1024, 19, 19] 2,048
LeakyReLU-251 [-1, 1024, 19, 19] 0
Convolutional-252 [-1, 1024, 19, 19] 0
DBLx5-253 [-1, 1024, 13, 13] 0
Conv2d-254 [-1, 512, 13, 13] 524,288
BatchNorm2d-255 [-1, 512, 13, 13] 1,024
LeakyReLU-256 [-1, 512, 13, 13] 0
Convolutional-257 [-1, 512, 13, 13] 0
ConvTranspose2d-258 [-1, 512, 26, 26] 1,049,088
Conv2d-259 [-1, 512, 26, 26] 4,718,592
BatchNorm2d-260 [-1, 512, 26, 26] 1,024
LeakyReLU-261 [-1, 512, 26, 26] 0
Convolutional-262 [-1, 512, 26, 26] 0
FPN-263 [-1, 512, 26, 26] 0
Conv2d-264 [-1, 256, 28, 28] 131,072
BatchNorm2d-265 [-1, 256, 28, 28] 512
LeakyReLU-266 [-1, 256, 28, 28] 0
Convolutional-267 [-1, 256, 28, 28] 0
Conv2d-268 [-1, 512, 28, 28] 1,179,648
BatchNorm2d-269 [-1, 512, 28, 28] 1,024
LeakyReLU-270 [-1, 512, 28, 28] 0
Convolutional-271 [-1, 512, 28, 28] 0
Conv2d-272 [-1, 256, 30, 30] 131,072
BatchNorm2d-273 [-1, 256, 30, 30] 512
LeakyReLU-274 [-1, 256, 30, 30] 0
Convolutional-275 [-1, 256, 30, 30] 0
Conv2d-276 [-1, 512, 30, 30] 1,179,648
BatchNorm2d-277 [-1, 512, 30, 30] 1,024
LeakyReLU-278 [-1, 512, 30, 30] 0
Convolutional-279 [-1, 512, 30, 30] 0
Conv2d-280 [-1, 512, 32, 32] 262,144
BatchNorm2d-281 [-1, 512, 32, 32] 1,024
LeakyReLU-282 [-1, 512, 32, 32] 0
Convolutional-283 [-1, 512, 32, 32] 0
DBLx5-284 [-1, 512, 26, 26] 0
Conv2d-285 [-1, 256, 26, 26] 131,072
BatchNorm2d-286 [-1, 256, 26, 26] 512
LeakyReLU-287 [-1, 256, 26, 26] 0
Convolutional-288 [-1, 256, 26, 26] 0
ConvTranspose2d-289 [-1, 256, 52, 52] 262,400
Conv2d-290 [-1, 256, 52, 52] 1,179,648
BatchNorm2d-291 [-1, 256, 52, 52] 512
LeakyReLU-292 [-1, 256, 52, 52] 0
Convolutional-293 [-1, 256, 52, 52] 0
FPN-294 [-1, 256, 52, 52] 0
Conv2d-295 [-1, 512, 13, 13] 524,288
BatchNorm2d-296 [-1, 512, 13, 13] 1,024
LeakyReLU-297 [-1, 512, 13, 13] 0
Convolutional-298 [-1, 512, 13, 13] 0
Conv2d-299 [-1, 1024, 13, 13] 4,718,592
BatchNorm2d-300 [-1, 1024, 13, 13] 2,048
LeakyReLU-301 [-1, 1024, 13, 13] 0
Convolutional-302 [-1, 1024, 13, 13] 0
Conv2d-303 [-1, 18, 13, 13] 18,450
Detection-304 [-1, 18, 13, 13] 0
Conv2d-305 [-1, 256, 26, 26] 131,072
BatchNorm2d-306 [-1, 256, 26, 26] 512
LeakyReLU-307 [-1, 256, 26, 26] 0
Convolutional-308 [-1, 256, 26, 26] 0
Conv2d-309 [-1, 512, 26, 26] 1,179,648
BatchNorm2d-310 [-1, 512, 26, 26] 1,024
LeakyReLU-311 [-1, 512, 26, 26] 0
Convolutional-312 [-1, 512, 26, 26] 0
Conv2d-313 [-1, 18, 26, 26] 9,234
Detection-314 [-1, 18, 26, 26] 0
Conv2d-315 [-1, 128, 52, 52] 32,768
BatchNorm2d-316 [-1, 128, 52, 52] 256
LeakyReLU-317 [-1, 128, 52, 52] 0
Convolutional-318 [-1, 128, 52, 52] 0
Conv2d-319 [-1, 256, 52, 52] 294,912
BatchNorm2d-320 [-1, 256, 52, 52] 512
LeakyReLU-321 [-1, 256, 52, 52] 0
Convolutional-322 [-1, 256, 52, 52] 0
Conv2d-323 [-1, 18, 52, 52] 4,626
Detection-324 [-1, 18, 52, 52] 0
================================================================
Total params: 69,802,262
Trainable params: 69,802,262
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.98
Forward/backward pass size (MB): 316329755939.75
Params size (MB): 266.27
Estimated Total Size (MB): 316329756208.00
--------------------------------------------------------------------------
我不明白错误是什么。不可能有这么大的模型。 错误:
OutOfMemoryError:CUDA 内存不足。尝试分配 170.00 MiB。 GPU 0 的总容量为 14.75 GiB,其中 7.06 MiB 可用。进程 5914 有 14.74 GiB 内存正在使用。在已分配的内存中,14.53 GiB 由 PyTorch 分配,88.94 MiB 由 PyTorch 保留但未分配。如果保留但未分配的内存很大,请尝试设置 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True 以避免碎片。
我不明白为什么模型这么大。
我尝试重现该错误并遇到了同样的问题。该问题来自
torchsummary
,因此您可能想在那里打开一个问题。
但是,我可以使用具有 8GB VRAM(批量大小为 1)的 GPU 来运行模型的前向和后向传递,没有任何问题。
如果您仍然遇到内存问题,您应该尝试减少批量大小。
我冒昧地清理了你的代码:
import torch
from torch import nn, Tensor
class Convolutional(nn.Module):
def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0) -> None:
super().__init__()
self.stack = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False),
nn.BatchNorm2d(out_channels),
nn.LeakyReLU(0.1)
)
def forward(self, x: Tensor) -> Tensor:
return self.stack(x)
class Detection(nn.Module):
def __init__(self, in_channels: int, C: int, B: int) -> None:
super().__init__()
out_channels = in_channels // 2
self.stack = nn.Sequential(
Convolutional(in_channels, out_channels, 1),
Convolutional(out_channels, in_channels, 3, padding=1),
nn.Conv2d(in_channels, 5 * B + C, 1)
)
def forward(self, x: Tensor) -> Tensor:
return self.stack(x)
class FPN(nn.Module):
def __init__(self, in_channels: int, out_channels: int) -> None:
super().__init__()
self.conv1 = Convolutional(in_channels, out_channels, 1)
self.up = nn.ConvTranspose2d(out_channels, out_channels, 2, stride=2)
self.conv3 = Convolutional(2 * out_channels, out_channels, 3, padding=1)
def forward(self, x: Tensor, skip: Tensor) -> Tensor:
x = self.conv1(x)
x = self.up(x)
out = torch.cat((x, skip), 1)
return self.conv3(out)
class DBLx5(nn.Module):
def __init__(self, in_channels: int) -> None:
super().__init__()
out_channels = in_channels // 2
self.stack = nn.Sequential(
Convolutional(in_channels, out_channels, 1),
Convolutional(out_channels, in_channels, 3, padding=1),
Convolutional(in_channels, out_channels, 1),
Convolutional(out_channels, in_channels, 3, padding=1),
Convolutional(in_channels, in_channels, 1)
)
def forward(self, x: Tensor) -> Tensor:
return self.stack(x)
class Residual(nn.Module):
def __init__(self, in_channels: int) -> None:
super().__init__()
self.stack = nn.Sequential(
Convolutional(in_channels, in_channels // 2, 1),
Convolutional(in_channels // 2, in_channels, 3, padding=1)
)
def forward(self, x: Tensor) -> Tensor:
return self.stack(x) + x
class Darknet(nn.Module):
def __init__(self) -> None:
super().__init__()
self.stack1 = nn.Sequential(
Convolutional(3, 32, 3, padding=1),
Convolutional(32, 64, 3, padding=1, stride=2),
Residual(64),
Convolutional(64, 128, 3, padding=1, stride=2),
*(Residual(128) for _ in range(2)),
Convolutional(128, 256, 3, padding=1, stride=2),
*(Residual(256) for _ in range(8))
)
self.stack2 = nn.Sequential(
Convolutional(256, 512, 3, padding=1, stride=2),
*(Residual(512) for _ in range(8))
)
self.stack3 = nn.Sequential(
Convolutional(512, 1024, 3, padding=1, stride=2),
*(Residual(1024) for _ in range(4))
)
def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
out1 = self.stack1(x)
out2 = self.stack2(out1)
out3 = self.stack3(out2)
return out1, out2, out3
class YOLOv3(nn.Module):
def __init__(self, C: int, B: int = 2) -> None:
super().__init__()
self.darknet = Darknet()
self.dbl5_1 = DBLx5(1024)
self.dbl5_2 = DBLx5(512)
self.detection_1 = Detection(1024, C, B)
self.detection_2 = Detection(512, C, B)
self.detection_3 = Detection(256, C, B)
self.fpn_1 = FPN(1024, 512)
self.fpn_2 = FPN(512, 256)
def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
x1, x2, x3 = self.darknet(x)
out1 = self.dbl5_1(x3)
out2 = self.fpn_1(out1, x2)
out2 = self.dbl5_2(out2)
out3 = self.fpn_2(out2, x1)
out1 = self.detection_1(out1)
out2 = self.detection_2(out2)
out3 = self.detection_3(out3)
return out1, out2, out3