如何计算pytorch中关键点检测CNN模型的精度?

问题描述 投票:0回答:2

谁能帮我一下,请问。

def train_net(n_epochs):
    valid_loss_min = np.Inf    
    history = {'train_loss': [], 'valid_loss': [], 'epoch': []}

    for epoch in range(n_epochs):  
        train_loss = 0.0
        valid_loss = 0.0  
        net.train()
        running_loss = 0.0
        for batch_i, data in enumerate(train_loader):
            images = data['image']
            key_pts = data['keypoints']
            key_pts = key_pts.view(key_pts.size(0), -1)
            key_pts = key_pts.type(torch.FloatTensor).to(device)
            images = images.type(torch.FloatTensor).to(device)
            output_pts = net(images)
            loss = criterion(output_pts, key_pts)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()*images.data.size(0)      
        net.eval() 

        with torch.no_grad():
            for batch_i, data in enumerate(test_loader):
                images = data['image']
                key_pts = data['keypoints']
                key_pts = key_pts.view(key_pts.size(0), -1)
                key_pts = key_pts.type(torch.FloatTensor).to(device)
                images = images.type(torch.FloatTensor).to(device)
                output_pts = net(images)
                loss = criterion(output_pts, key_pts)          
                valid_loss += loss.item()*images.data.size(0) 
        train_loss = train_loss/len(train_loader.dataset)
        valid_loss = valid_loss/len(test_loader.dataset) 
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch+1,train_loss,valid_loss))

        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))    
            torch.save(net,f'X:\\xxxx\\xxx\\xxx\\epoch{epoch + 1}_loss{valid_loss}.pth')
            valid_loss_min = valid_loss
        history['epoch'].append(epoch + 1)
        history['train_loss'].append(train_loss)
        history['valid_loss'].append(valid_loss)
    print('Finished Training')
    return history
'''

Above is the training code for reference!
python deep-learning pytorch
2个回答
1
投票

这很有趣,我几分钟前还在研究这个问题呢! 你可能已经意识到,简单地计算两组关键点之间的欧几里得距离并不能很好地适用于需要比较不同体型和尺寸的情况。所以我建议使用Object Keypoint Similarity Score,它可以测量人体关节距离,并按人的比例进行归一化。如图所示 博客,OKS定义为:

enter image description here

这里 (第313行功能 computeOKS)是Facebook研究的实现。

def computeOks(self, imgId, catId):
    p = self.params
    # dimention here should be Nxm
    gts = self._gts[imgId, catId]
    dts = self._dts[imgId, catId]
    inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
    dts = [dts[i] for i in inds]
    if len(dts) > p.maxDets[-1]:
        dts = dts[0:p.maxDets[-1]]
    # if len(gts) == 0 and len(dts) == 0:
    if len(gts) == 0 or len(dts) == 0:
        return []
    ious = np.zeros((len(dts), len(gts)))
    sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
    vars = (sigmas * 2)**2
    k = len(sigmas)
    # compute oks between each detection and ground truth object
    for j, gt in enumerate(gts):
        # create bounds for ignore regions(double the gt bbox)
        g = np.array(gt['keypoints'])
        xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
        k1 = np.count_nonzero(vg > 0)
        bb = gt['bbox']
        x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
        y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
        for i, dt in enumerate(dts):
            d = np.array(dt['keypoints'])
            xd = d[0::3]; yd = d[1::3]
            if k1>0:
                # measure the per-keypoint distance if keypoints visible
                dx = xd - xg
                dy = yd - yg
            else:
                # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                z = np.zeros((k))
                dx = np.max((z, x0-xd), axis=0) + np.max((z, xd-x1), axis=0)
                dy = np.max((z, y0-yd), axis=0) + np.max((z, yd-y1), axis=0)
            e = (dx**2 + dy**2) / vars / (gt['area'] + np.spacing(1)) / 2
            if k1 > 0:
                e=e[vg > 0]
            ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
    return ious

0
投票

也许是用欧氏距离:真正的关键点:(x, y)预测的关键点:(x, y)。(x, y)预测的关键点: (x_, y_)距离d: sqrt((x_ - x)^2 + (y_ - y)) (x_, y_)距离d: sqrt((x_ - x)^2 + (y_ - y)^2).由此你必须得到一个百分比。如果d == 0,你对该关键点的准确率为100%。但是什么是0%呢?我想说的是,从真正的关键点到图像角落的距离,这是最远的关键点。让我们称这个距离为R.所以你的点的准确度是d R.为每个关键点做这个,并采取平均.我只是想出了这一点,所以它可能有一些缺陷,但我认为你可以用它工作,并检查它是否为你的正确解决方案。

© www.soinside.com 2019 - 2024. All rights reserved.