“向前/向后通道尺寸”对于pytorch模型(Yolov3)来说太大了

问题描述 投票:0回答:1

我正在 Pytorch 中编写 Yolov3。 架构:https://i.sstatic.net/mncjfiDs.png

代码:

class Convolutional(nn.Module):  # DBL
  def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding=1):
    super().__init__()
    self._stack = nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False),
        nn.BatchNorm2d(out_channels),
        nn.LeakyReLU(0.1),
    )
  def forward(self, x):
    return self._stack(x)

class Detection(nn.Module):
  def __init__(self, in_channels: int, C, B):  # where A is number of anchors
    super().__init__()
    out_channels = in_channels // 2
    self.stack = nn.Sequential(
        Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, padding=0),
        Convolutional(in_channels=out_channels, out_channels=in_channels, kernel_size=3),
        nn.Conv2d(in_channels=in_channels, out_channels=((B * 5) + C), kernel_size=1, padding=0)
    )

  def forward(self, x):
    out = self.stack(x)
    print(out.shape)
    return out

class FPN(nn.Module):
  def __init__(self, in_channels, out_channels):
    super().__init__()
    self.conv1 = Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, padding=0)
    self.up = nn.ConvTranspose2d(out_channels, out_channels, kernel_size=2, stride=2)
    self.conv3 = Convolutional(in_channels=(out_channels * 2), out_channels=out_channels, kernel_size=3)

  def forward(self, x, skip):
    x = self.conv1(x)
    x = self.up(x)
    out = torch.cat([x, skip], dim=1)
    out = self.conv3(out)
    return out


class DBLx5(nn.Module):
  def __init__(self, in_channels):
    super().__init__()
    out_channels = in_channels // 2
    self.stack = nn.Sequential(
        Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
        Convolutional(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1),
        Convolutional(in_channels=in_channels, out_channels=out_channels, kernel_size=1, stride=1),
        Convolutional(in_channels=out_channels, out_channels=in_channels, kernel_size=3, stride=1),
        Convolutional(in_channels=in_channels, out_channels=in_channels, kernel_size=1, stride=1)
    )

  def forward(self, x):
    out = self.stack(x)
    return x

class Residual(nn.Module):  # ResUnit
  def __init__(self, in_channels: int):
    super().__init__()
    self._conv_stack = nn.Sequential(
        Convolutional(in_channels=in_channels, out_channels=(in_channels // 2), kernel_size=1, padding=0),
        Convolutional(in_channels=(in_channels // 2), out_channels=in_channels, kernel_size=3, padding=1),
    )

  def forward(self, x):
    out = self._conv_stack(x)
    return x + out


class Darknet(nn.Module):
  def __init__(self):
    super().__init__()
    # Res11
    self.stack_list1 = list()
    self.stack_list1 = [
        Convolutional(in_channels=3, out_channels=32, kernel_size=3),
        Convolutional(in_channels=32, out_channels=64, kernel_size=3, stride=2),
        Residual(64),
        Convolutional(in_channels=64, out_channels=128, kernel_size=3, stride=2),
    ]
    for _ in range(2):
      self.stack_list1.append(Residual(128))
    self.stack_list1.append(
        Convolutional(in_channels=128, out_channels=256, kernel_size=3, stride=2)
        )
    for _ in range(8):
      self.stack_list1.append(Residual(256))
    # Res 8
    self.stack_list2 = list()
    self.stack_list2.append(
        Convolutional(in_channels=256, out_channels=512, kernel_size=3, stride=2)
        )
    for _ in range(8):
      self.stack_list2.append(Residual(512))
    # Res4
    self.stack_list3 = list()
    self.stack_list3.append(
        Convolutional(in_channels=512, out_channels=1024, kernel_size=3, stride=2)
        )
    for _ in range(4):
      self.stack_list3.append(Residual(1024))

    self.stack_list1 = nn.Sequential(*self.stack_list1)
    self.stack_list2 = nn.Sequential(*self.stack_list2)
    self.stack_list3 = nn.Sequential(*self.stack_list3)

  def forward(self, x):
    out1 = self.stack_list1(x)
    out2 = self.stack_list2(out1)
    out3 = self.stack_list3(out2)
    return out1, out2, out3

class YOLOv3(nn.Module):
  def __init__(self, C, B=2):
    super().__init__()
    self.darknet = Darknet()  # out size 19x19
    self.dbl5_1 = DBLx5(1024)
    self.dbl5_2 = DBLx5(512)
    self.detection_1 = Detection(1024, C, B)
    self.detection_2 = Detection(512, C, B)
    self.detection_3 = Detection(256, C, B)
    self.fpn_1 = FPN(1024, 512)
    self.fpn_2 = FPN(512, 256)

  def forward(self, x):
    # x1 - 256x256
    # x2 - 512x512
    # x3 - 1024x1024
    x1, x2, x3 = self.darknet(x)
    out1 = self.dbl5_1(x3)

    out2 = self.fpn_1(out1, x2)
    out2 = self.dbl5_2(out2)

    out3 = self.fpn_2(out2, x1)

    out1 = self.detection_1(out1)
    out2 = self.detection_2(out2)
    out3 = self.detection_3(out3)

    return out1, out2, out3

Torchsummary 给出了一些不切实际的大模型尺寸。当训练以获得模型预测时,它会给出 CUDA 内存不足错误。我认为问题出在 Darknet-53 上。单独来看,模型的重量正常,但如果将其导入 Yolo,就会出现这个巨大的数字。 (图像尺寸为3x416x416) 摘要输出:

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [-1, 32, 416, 416]             864
       BatchNorm2d-2         [-1, 32, 416, 416]              64
         LeakyReLU-3         [-1, 32, 416, 416]               0
     Convolutional-4         [-1, 32, 416, 416]               0
            Conv2d-5         [-1, 64, 208, 208]          18,432
       BatchNorm2d-6         [-1, 64, 208, 208]             128
         LeakyReLU-7         [-1, 64, 208, 208]               0
     Convolutional-8         [-1, 64, 208, 208]               0
            Conv2d-9         [-1, 32, 208, 208]           2,048
      BatchNorm2d-10         [-1, 32, 208, 208]              64
        LeakyReLU-11         [-1, 32, 208, 208]               0
    Convolutional-12         [-1, 32, 208, 208]               0
           Conv2d-13         [-1, 64, 208, 208]          18,432
      BatchNorm2d-14         [-1, 64, 208, 208]             128
        LeakyReLU-15         [-1, 64, 208, 208]               0
    Convolutional-16         [-1, 64, 208, 208]               0
         Residual-17         [-1, 64, 208, 208]               0
           Conv2d-18        [-1, 128, 104, 104]          73,728
      BatchNorm2d-19        [-1, 128, 104, 104]             256
        LeakyReLU-20        [-1, 128, 104, 104]               0
    Convolutional-21        [-1, 128, 104, 104]               0
           Conv2d-22         [-1, 64, 104, 104]           8,192
      BatchNorm2d-23         [-1, 64, 104, 104]             128
        LeakyReLU-24         [-1, 64, 104, 104]               0
    Convolutional-25         [-1, 64, 104, 104]               0
           Conv2d-26        [-1, 128, 104, 104]          73,728
      BatchNorm2d-27        [-1, 128, 104, 104]             256
        LeakyReLU-28        [-1, 128, 104, 104]               0
    Convolutional-29        [-1, 128, 104, 104]               0
         Residual-30        [-1, 128, 104, 104]               0
           Conv2d-31         [-1, 64, 104, 104]           8,192
      BatchNorm2d-32         [-1, 64, 104, 104]             128
        LeakyReLU-33         [-1, 64, 104, 104]               0
    Convolutional-34         [-1, 64, 104, 104]               0
           Conv2d-35        [-1, 128, 104, 104]          73,728
      BatchNorm2d-36        [-1, 128, 104, 104]             256
        LeakyReLU-37        [-1, 128, 104, 104]               0
    Convolutional-38        [-1, 128, 104, 104]               0
         Residual-39        [-1, 128, 104, 104]               0
           Conv2d-40          [-1, 256, 52, 52]         294,912
      BatchNorm2d-41          [-1, 256, 52, 52]             512
        LeakyReLU-42          [-1, 256, 52, 52]               0
    Convolutional-43          [-1, 256, 52, 52]               0
           Conv2d-44          [-1, 128, 52, 52]          32,768
      BatchNorm2d-45          [-1, 128, 52, 52]             256
        LeakyReLU-46          [-1, 128, 52, 52]               0
    Convolutional-47          [-1, 128, 52, 52]               0
           Conv2d-48          [-1, 256, 52, 52]         294,912
      BatchNorm2d-49          [-1, 256, 52, 52]             512
        LeakyReLU-50          [-1, 256, 52, 52]               0
    Convolutional-51          [-1, 256, 52, 52]               0
         Residual-52          [-1, 256, 52, 52]               0
           Conv2d-53          [-1, 128, 52, 52]          32,768
      BatchNorm2d-54          [-1, 128, 52, 52]             256
        LeakyReLU-55          [-1, 128, 52, 52]               0
    Convolutional-56          [-1, 128, 52, 52]               0
           Conv2d-57          [-1, 256, 52, 52]         294,912
      BatchNorm2d-58          [-1, 256, 52, 52]             512
        LeakyReLU-59          [-1, 256, 52, 52]               0
    Convolutional-60          [-1, 256, 52, 52]               0
         Residual-61          [-1, 256, 52, 52]               0
           Conv2d-62          [-1, 128, 52, 52]          32,768
      BatchNorm2d-63          [-1, 128, 52, 52]             256
        LeakyReLU-64          [-1, 128, 52, 52]               0
    Convolutional-65          [-1, 128, 52, 52]               0
           Conv2d-66          [-1, 256, 52, 52]         294,912
      BatchNorm2d-67          [-1, 256, 52, 52]             512
        LeakyReLU-68          [-1, 256, 52, 52]               0
    Convolutional-69          [-1, 256, 52, 52]               0
         Residual-70          [-1, 256, 52, 52]               0
           Conv2d-71          [-1, 128, 52, 52]          32,768
      BatchNorm2d-72          [-1, 128, 52, 52]             256
        LeakyReLU-73          [-1, 128, 52, 52]               0
    Convolutional-74          [-1, 128, 52, 52]               0
           Conv2d-75          [-1, 256, 52, 52]         294,912
      BatchNorm2d-76          [-1, 256, 52, 52]             512
        LeakyReLU-77          [-1, 256, 52, 52]               0
    Convolutional-78          [-1, 256, 52, 52]               0
         Residual-79          [-1, 256, 52, 52]               0
           Conv2d-80          [-1, 128, 52, 52]          32,768
      BatchNorm2d-81          [-1, 128, 52, 52]             256
        LeakyReLU-82          [-1, 128, 52, 52]               0
    Convolutional-83          [-1, 128, 52, 52]               0
           Conv2d-84          [-1, 256, 52, 52]         294,912
      BatchNorm2d-85          [-1, 256, 52, 52]             512
        LeakyReLU-86          [-1, 256, 52, 52]               0
    Convolutional-87          [-1, 256, 52, 52]               0
         Residual-88          [-1, 256, 52, 52]               0
           Conv2d-89          [-1, 128, 52, 52]          32,768
      BatchNorm2d-90          [-1, 128, 52, 52]             256
        LeakyReLU-91          [-1, 128, 52, 52]               0
    Convolutional-92          [-1, 128, 52, 52]               0
           Conv2d-93          [-1, 256, 52, 52]         294,912
      BatchNorm2d-94          [-1, 256, 52, 52]             512
        LeakyReLU-95          [-1, 256, 52, 52]               0
    Convolutional-96          [-1, 256, 52, 52]               0
         Residual-97          [-1, 256, 52, 52]               0
           Conv2d-98          [-1, 128, 52, 52]          32,768
      BatchNorm2d-99          [-1, 128, 52, 52]             256
       LeakyReLU-100          [-1, 128, 52, 52]               0
   Convolutional-101          [-1, 128, 52, 52]               0
          Conv2d-102          [-1, 256, 52, 52]         294,912
     BatchNorm2d-103          [-1, 256, 52, 52]             512
       LeakyReLU-104          [-1, 256, 52, 52]               0
   Convolutional-105          [-1, 256, 52, 52]               0
        Residual-106          [-1, 256, 52, 52]               0
          Conv2d-107          [-1, 128, 52, 52]          32,768
     BatchNorm2d-108          [-1, 128, 52, 52]             256
       LeakyReLU-109          [-1, 128, 52, 52]               0
   Convolutional-110          [-1, 128, 52, 52]               0
          Conv2d-111          [-1, 256, 52, 52]         294,912
     BatchNorm2d-112          [-1, 256, 52, 52]             512
       LeakyReLU-113          [-1, 256, 52, 52]               0
   Convolutional-114          [-1, 256, 52, 52]               0
        Residual-115          [-1, 256, 52, 52]               0
          Conv2d-116          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-117          [-1, 512, 26, 26]           1,024
       LeakyReLU-118          [-1, 512, 26, 26]               0
   Convolutional-119          [-1, 512, 26, 26]               0
          Conv2d-120          [-1, 256, 26, 26]         131,072
     BatchNorm2d-121          [-1, 256, 26, 26]             512
       LeakyReLU-122          [-1, 256, 26, 26]               0
   Convolutional-123          [-1, 256, 26, 26]               0
          Conv2d-124          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-125          [-1, 512, 26, 26]           1,024
       LeakyReLU-126          [-1, 512, 26, 26]               0
   Convolutional-127          [-1, 512, 26, 26]               0
        Residual-128          [-1, 512, 26, 26]               0
          Conv2d-129          [-1, 256, 26, 26]         131,072
     BatchNorm2d-130          [-1, 256, 26, 26]             512
       LeakyReLU-131          [-1, 256, 26, 26]               0
   Convolutional-132          [-1, 256, 26, 26]               0
          Conv2d-133          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-134          [-1, 512, 26, 26]           1,024
       LeakyReLU-135          [-1, 512, 26, 26]               0
   Convolutional-136          [-1, 512, 26, 26]               0
        Residual-137          [-1, 512, 26, 26]               0
          Conv2d-138          [-1, 256, 26, 26]         131,072
     BatchNorm2d-139          [-1, 256, 26, 26]             512
       LeakyReLU-140          [-1, 256, 26, 26]               0
   Convolutional-141          [-1, 256, 26, 26]               0
          Conv2d-142          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-143          [-1, 512, 26, 26]           1,024
       LeakyReLU-144          [-1, 512, 26, 26]               0
   Convolutional-145          [-1, 512, 26, 26]               0
        Residual-146          [-1, 512, 26, 26]               0
          Conv2d-147          [-1, 256, 26, 26]         131,072
     BatchNorm2d-148          [-1, 256, 26, 26]             512
       LeakyReLU-149          [-1, 256, 26, 26]               0
   Convolutional-150          [-1, 256, 26, 26]               0
          Conv2d-151          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-152          [-1, 512, 26, 26]           1,024
       LeakyReLU-153          [-1, 512, 26, 26]               0
   Convolutional-154          [-1, 512, 26, 26]               0
        Residual-155          [-1, 512, 26, 26]               0
          Conv2d-156          [-1, 256, 26, 26]         131,072
     BatchNorm2d-157          [-1, 256, 26, 26]             512
       LeakyReLU-158          [-1, 256, 26, 26]               0
   Convolutional-159          [-1, 256, 26, 26]               0
          Conv2d-160          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-161          [-1, 512, 26, 26]           1,024
       LeakyReLU-162          [-1, 512, 26, 26]               0
   Convolutional-163          [-1, 512, 26, 26]               0
        Residual-164          [-1, 512, 26, 26]               0
          Conv2d-165          [-1, 256, 26, 26]         131,072
     BatchNorm2d-166          [-1, 256, 26, 26]             512
       LeakyReLU-167          [-1, 256, 26, 26]               0
   Convolutional-168          [-1, 256, 26, 26]               0
          Conv2d-169          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-170          [-1, 512, 26, 26]           1,024
       LeakyReLU-171          [-1, 512, 26, 26]               0
   Convolutional-172          [-1, 512, 26, 26]               0
        Residual-173          [-1, 512, 26, 26]               0
          Conv2d-174          [-1, 256, 26, 26]         131,072
     BatchNorm2d-175          [-1, 256, 26, 26]             512
       LeakyReLU-176          [-1, 256, 26, 26]               0
   Convolutional-177          [-1, 256, 26, 26]               0
          Conv2d-178          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-179          [-1, 512, 26, 26]           1,024
       LeakyReLU-180          [-1, 512, 26, 26]               0
   Convolutional-181          [-1, 512, 26, 26]               0
        Residual-182          [-1, 512, 26, 26]               0
          Conv2d-183          [-1, 256, 26, 26]         131,072
     BatchNorm2d-184          [-1, 256, 26, 26]             512
       LeakyReLU-185          [-1, 256, 26, 26]               0
   Convolutional-186          [-1, 256, 26, 26]               0
          Conv2d-187          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-188          [-1, 512, 26, 26]           1,024
       LeakyReLU-189          [-1, 512, 26, 26]               0
   Convolutional-190          [-1, 512, 26, 26]               0
        Residual-191          [-1, 512, 26, 26]               0
          Conv2d-192         [-1, 1024, 13, 13]       4,718,592
     BatchNorm2d-193         [-1, 1024, 13, 13]           2,048
       LeakyReLU-194         [-1, 1024, 13, 13]               0
   Convolutional-195         [-1, 1024, 13, 13]               0
          Conv2d-196          [-1, 512, 13, 13]         524,288
     BatchNorm2d-197          [-1, 512, 13, 13]           1,024
       LeakyReLU-198          [-1, 512, 13, 13]               0
   Convolutional-199          [-1, 512, 13, 13]               0
          Conv2d-200         [-1, 1024, 13, 13]       4,718,592
     BatchNorm2d-201         [-1, 1024, 13, 13]           2,048
       LeakyReLU-202         [-1, 1024, 13, 13]               0
   Convolutional-203         [-1, 1024, 13, 13]               0
        Residual-204         [-1, 1024, 13, 13]               0
          Conv2d-205          [-1, 512, 13, 13]         524,288
     BatchNorm2d-206          [-1, 512, 13, 13]           1,024
       LeakyReLU-207          [-1, 512, 13, 13]               0
   Convolutional-208          [-1, 512, 13, 13]               0
          Conv2d-209         [-1, 1024, 13, 13]       4,718,592
     BatchNorm2d-210         [-1, 1024, 13, 13]           2,048
       LeakyReLU-211         [-1, 1024, 13, 13]               0
   Convolutional-212         [-1, 1024, 13, 13]               0
        Residual-213         [-1, 1024, 13, 13]               0
          Conv2d-214          [-1, 512, 13, 13]         524,288
     BatchNorm2d-215          [-1, 512, 13, 13]           1,024
       LeakyReLU-216          [-1, 512, 13, 13]               0
   Convolutional-217          [-1, 512, 13, 13]               0
          Conv2d-218         [-1, 1024, 13, 13]       4,718,592
     BatchNorm2d-219         [-1, 1024, 13, 13]           2,048
       LeakyReLU-220         [-1, 1024, 13, 13]               0
   Convolutional-221         [-1, 1024, 13, 13]               0
        Residual-222         [-1, 1024, 13, 13]               0
          Conv2d-223          [-1, 512, 13, 13]         524,288
     BatchNorm2d-224          [-1, 512, 13, 13]           1,024
       LeakyReLU-225          [-1, 512, 13, 13]               0
   Convolutional-226          [-1, 512, 13, 13]               0
          Conv2d-227         [-1, 1024, 13, 13]       4,718,592
     BatchNorm2d-228         [-1, 1024, 13, 13]           2,048
       LeakyReLU-229         [-1, 1024, 13, 13]               0
   Convolutional-230         [-1, 1024, 13, 13]               0
        Residual-231         [-1, 1024, 13, 13]               0
         Darknet-232  [[-1, 256, 52, 52], [-1, 512, 26, 26], [-1, 1024, 13, 13]]               0
          Conv2d-233          [-1, 512, 15, 15]         524,288
     BatchNorm2d-234          [-1, 512, 15, 15]           1,024
       LeakyReLU-235          [-1, 512, 15, 15]               0
   Convolutional-236          [-1, 512, 15, 15]               0
          Conv2d-237         [-1, 1024, 15, 15]       4,718,592
     BatchNorm2d-238         [-1, 1024, 15, 15]           2,048
       LeakyReLU-239         [-1, 1024, 15, 15]               0
   Convolutional-240         [-1, 1024, 15, 15]               0
          Conv2d-241          [-1, 512, 17, 17]         524,288
     BatchNorm2d-242          [-1, 512, 17, 17]           1,024
       LeakyReLU-243          [-1, 512, 17, 17]               0
   Convolutional-244          [-1, 512, 17, 17]               0
          Conv2d-245         [-1, 1024, 17, 17]       4,718,592
     BatchNorm2d-246         [-1, 1024, 17, 17]           2,048
       LeakyReLU-247         [-1, 1024, 17, 17]               0
   Convolutional-248         [-1, 1024, 17, 17]               0
          Conv2d-249         [-1, 1024, 19, 19]       1,048,576
     BatchNorm2d-250         [-1, 1024, 19, 19]           2,048
       LeakyReLU-251         [-1, 1024, 19, 19]               0
   Convolutional-252         [-1, 1024, 19, 19]               0
           DBLx5-253         [-1, 1024, 13, 13]               0
          Conv2d-254          [-1, 512, 13, 13]         524,288
     BatchNorm2d-255          [-1, 512, 13, 13]           1,024
       LeakyReLU-256          [-1, 512, 13, 13]               0
   Convolutional-257          [-1, 512, 13, 13]               0
 ConvTranspose2d-258          [-1, 512, 26, 26]       1,049,088
          Conv2d-259          [-1, 512, 26, 26]       4,718,592
     BatchNorm2d-260          [-1, 512, 26, 26]           1,024
       LeakyReLU-261          [-1, 512, 26, 26]               0
   Convolutional-262          [-1, 512, 26, 26]               0
             FPN-263          [-1, 512, 26, 26]               0
          Conv2d-264          [-1, 256, 28, 28]         131,072
     BatchNorm2d-265          [-1, 256, 28, 28]             512
       LeakyReLU-266          [-1, 256, 28, 28]               0
   Convolutional-267          [-1, 256, 28, 28]               0
          Conv2d-268          [-1, 512, 28, 28]       1,179,648
     BatchNorm2d-269          [-1, 512, 28, 28]           1,024
       LeakyReLU-270          [-1, 512, 28, 28]               0
   Convolutional-271          [-1, 512, 28, 28]               0
          Conv2d-272          [-1, 256, 30, 30]         131,072
     BatchNorm2d-273          [-1, 256, 30, 30]             512
       LeakyReLU-274          [-1, 256, 30, 30]               0
   Convolutional-275          [-1, 256, 30, 30]               0
          Conv2d-276          [-1, 512, 30, 30]       1,179,648
     BatchNorm2d-277          [-1, 512, 30, 30]           1,024
       LeakyReLU-278          [-1, 512, 30, 30]               0
   Convolutional-279          [-1, 512, 30, 30]               0
          Conv2d-280          [-1, 512, 32, 32]         262,144
     BatchNorm2d-281          [-1, 512, 32, 32]           1,024
       LeakyReLU-282          [-1, 512, 32, 32]               0
   Convolutional-283          [-1, 512, 32, 32]               0
           DBLx5-284          [-1, 512, 26, 26]               0
          Conv2d-285          [-1, 256, 26, 26]         131,072
     BatchNorm2d-286          [-1, 256, 26, 26]             512
       LeakyReLU-287          [-1, 256, 26, 26]               0
   Convolutional-288          [-1, 256, 26, 26]               0
 ConvTranspose2d-289          [-1, 256, 52, 52]         262,400
          Conv2d-290          [-1, 256, 52, 52]       1,179,648
     BatchNorm2d-291          [-1, 256, 52, 52]             512
       LeakyReLU-292          [-1, 256, 52, 52]               0
   Convolutional-293          [-1, 256, 52, 52]               0
             FPN-294          [-1, 256, 52, 52]               0
          Conv2d-295          [-1, 512, 13, 13]         524,288
     BatchNorm2d-296          [-1, 512, 13, 13]           1,024
       LeakyReLU-297          [-1, 512, 13, 13]               0
   Convolutional-298          [-1, 512, 13, 13]               0
          Conv2d-299         [-1, 1024, 13, 13]       4,718,592
     BatchNorm2d-300         [-1, 1024, 13, 13]           2,048
       LeakyReLU-301         [-1, 1024, 13, 13]               0
   Convolutional-302         [-1, 1024, 13, 13]               0
          Conv2d-303           [-1, 18, 13, 13]          18,450
       Detection-304           [-1, 18, 13, 13]               0
          Conv2d-305          [-1, 256, 26, 26]         131,072
     BatchNorm2d-306          [-1, 256, 26, 26]             512
       LeakyReLU-307          [-1, 256, 26, 26]               0
   Convolutional-308          [-1, 256, 26, 26]               0
          Conv2d-309          [-1, 512, 26, 26]       1,179,648
     BatchNorm2d-310          [-1, 512, 26, 26]           1,024
       LeakyReLU-311          [-1, 512, 26, 26]               0
   Convolutional-312          [-1, 512, 26, 26]               0
          Conv2d-313           [-1, 18, 26, 26]           9,234
       Detection-314           [-1, 18, 26, 26]               0
          Conv2d-315          [-1, 128, 52, 52]          32,768
     BatchNorm2d-316          [-1, 128, 52, 52]             256
       LeakyReLU-317          [-1, 128, 52, 52]               0
   Convolutional-318          [-1, 128, 52, 52]               0
          Conv2d-319          [-1, 256, 52, 52]         294,912
     BatchNorm2d-320          [-1, 256, 52, 52]             512
       LeakyReLU-321          [-1, 256, 52, 52]               0
   Convolutional-322          [-1, 256, 52, 52]               0
          Conv2d-323           [-1, 18, 52, 52]           4,626
       Detection-324           [-1, 18, 52, 52]               0
================================================================
Total params: 69,802,262
Trainable params: 69,802,262
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.98
Forward/backward pass size (MB): 316329755939.75
Params size (MB): 266.27
Estimated Total Size (MB): 316329756208.00
--------------------------------------------------------------------------

我不明白错误是什么。不可能有这么大的模型。 错误:

OutOfMemoryError:CUDA 内存不足。尝试分配 170.00 MiB。 GPU 0 的总容量为 14.75 GiB,其中 7.06 MiB 可用。进程 5914 有 14.74 GiB 内存正在使用。在已分配的内存中,14.53 GiB 由 PyTorch 分配,88.94 MiB 由 PyTorch 保留但未分配。如果保留但未分配的内存很大,请尝试设置 PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True 以避免碎片。

我不明白为什么模型这么大。

python machine-learning deep-learning pytorch yolo
1个回答
0
投票

我尝试重现该错误并遇到了同样的问题。该问题来自

torchsummary
,因此您可能想在那里打开一个问题。

但是,我可以使用具有 8GB VRAM(批量大小为 1)的 GPU 来运行模型的前向和后向传递,没有任何问题。

如果您仍然遇到内存问题,您应该尝试减少批量大小。

我冒昧地清理了你的代码:

import torch
from torch import nn, Tensor


class Convolutional(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1, padding: int = 0) -> None:
        super().__init__()

        self.stack = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(0.1)
        )

    def forward(self, x: Tensor) -> Tensor:
        return self.stack(x)


class Detection(nn.Module):
    def __init__(self, in_channels: int, C: int, B: int) -> None:
        super().__init__()

        out_channels = in_channels // 2
        self.stack = nn.Sequential(
            Convolutional(in_channels, out_channels, 1),
            Convolutional(out_channels, in_channels, 3, padding=1),
            nn.Conv2d(in_channels, 5 * B + C, 1)
        )

    def forward(self, x: Tensor) -> Tensor:
        return self.stack(x)


class FPN(nn.Module):
    def __init__(self, in_channels: int, out_channels: int) -> None:
        super().__init__()

        self.conv1 = Convolutional(in_channels, out_channels, 1)
        self.up = nn.ConvTranspose2d(out_channels, out_channels, 2, stride=2)
        self.conv3 = Convolutional(2 * out_channels, out_channels, 3, padding=1)

    def forward(self, x: Tensor, skip: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.up(x)
        out = torch.cat((x, skip), 1)
        return self.conv3(out)


class DBLx5(nn.Module):
    def __init__(self, in_channels: int) -> None:
        super().__init__()

        out_channels = in_channels // 2
        self.stack = nn.Sequential(
            Convolutional(in_channels, out_channels, 1),
            Convolutional(out_channels, in_channels, 3, padding=1),
            Convolutional(in_channels, out_channels, 1),
            Convolutional(out_channels, in_channels, 3, padding=1),
            Convolutional(in_channels, in_channels, 1)
        )

    def forward(self, x: Tensor) -> Tensor:
        return self.stack(x)


class Residual(nn.Module):
    def __init__(self, in_channels: int) -> None:
        super().__init__()

        self.stack = nn.Sequential(
            Convolutional(in_channels, in_channels // 2, 1),
            Convolutional(in_channels // 2, in_channels, 3, padding=1)
        )

    def forward(self, x: Tensor) -> Tensor:
        return self.stack(x) + x


class Darknet(nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.stack1 = nn.Sequential(
            Convolutional(3, 32, 3, padding=1),
            Convolutional(32, 64, 3, padding=1, stride=2),
            Residual(64),
            Convolutional(64, 128, 3, padding=1, stride=2),
            *(Residual(128) for _ in range(2)),
            Convolutional(128, 256, 3, padding=1, stride=2),
            *(Residual(256) for _ in range(8))
        )

        self.stack2 = nn.Sequential(
            Convolutional(256, 512, 3, padding=1, stride=2),
            *(Residual(512) for _ in range(8))
        )

        self.stack3 = nn.Sequential(
            Convolutional(512, 1024, 3, padding=1, stride=2),
            *(Residual(1024) for _ in range(4))
        )

    def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
        out1 = self.stack1(x)
        out2 = self.stack2(out1)
        out3 = self.stack3(out2)

        return out1, out2, out3


class YOLOv3(nn.Module):
    def __init__(self, C: int, B: int = 2) -> None:
        super().__init__()

        self.darknet = Darknet()
        self.dbl5_1 = DBLx5(1024)
        self.dbl5_2 = DBLx5(512)

        self.detection_1 = Detection(1024, C, B)
        self.detection_2 = Detection(512, C, B)
        self.detection_3 = Detection(256, C, B)

        self.fpn_1 = FPN(1024, 512)
        self.fpn_2 = FPN(512, 256)

    def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]:
        x1, x2, x3 = self.darknet(x)

        out1 = self.dbl5_1(x3)

        out2 = self.fpn_1(out1, x2)
        out2 = self.dbl5_2(out2)

        out3 = self.fpn_2(out2, x1)

        out1 = self.detection_1(out1)
        out2 = self.detection_2(out2)
        out3 = self.detection_3(out3)

        return out1, out2, out3
© www.soinside.com 2019 - 2024. All rights reserved.