OpenCV 中聚类方法的并行化

Question

我正在我的项目中训练用于闭环检测的 fabMap 算法。训练包括描述符、词汇和 Chow-Liu 树的创建。我有一个包含超过 10,000 张图像的数据库。我正在使用一个非常好的桌面（12 核双线程、32 GB RAM 和 6 GB Nvidia 显卡），并且我想在训练我的系统时充分利用它。我在 Windows 7 64 位系统上使用 opencv 3.0，启用了 TBB。

问题是只有描述符的提取是多线程的。 Chow-Liu 树的聚类和构建是在单个线程中执行的。 BOWMSCTrainer 类的 cluster() 方法有 3 个嵌套的

for()

循环，其中每个循环都依赖于前一个循环，甚至嵌套循环的大小也是动态分配的。这是 cluster() 方法的核心：

//_descriptors is a Matrix wherein each row is a descriptor

Mat icovar = Mat::eye(_descriptors.cols,_descriptors.cols,_descriptors.type());

std::vector<Mat> initialCentres;
initialCentres.push_back(_descriptors.row(0));
for (int i = 1; i < _descriptors.rows; i++) {
    double minDist = DBL_MAX;
    for (size_t j = 0; j < initialCentres.size(); j++) {
        minDist = std::min(minDist,
            cv::Mahalanobis(_descriptors.row(i),initialCentres[j],
            icovar));
    }
    if (minDist > clusterSize)
        initialCentres.push_back(_descriptors.row(i));
}

std::vector<std::list<cv::Mat> > clusters;
clusters.resize(initialCentres.size());
for (int i = 0; i < _descriptors.rows; i++) {
    int index = 0; double dist = 0, minDist = DBL_MAX;
    for (size_t j = 0; j < initialCentres.size(); j++) {
        dist = cv::Mahalanobis(_descriptors.row(i),initialCentres[j],icovar);
        if (dist < minDist) {
            minDist = dist;
            index = (int)j;
        }
    }
    clusters[index].push_back(_descriptors.row(i));
}

// TODO: throw away small clusters.

Mat vocabulary;
Mat centre = Mat::zeros(1,_descriptors.cols,_descriptors.type());
for (size_t i = 0; i < clusters.size(); i++) {
    centre.setTo(0);
    for (std::list<cv::Mat>::iterator Ci = clusters[i].begin(); Ci != clusters[i].end(); Ci++) {
        centre += *Ci;
    }
    centre /= (double)clusters[i].size();
    vocabulary.push_back(centre);
}

return vocabulary;
}

为了了解训练需要多长时间，我对数据库进行了下采样。我一开始只使用了 10 张图像（约 20,000 个描述符），花了大约 40 分钟。对于 100 张图像（约 300.000 个描述符）的样本，整个过程花费了大约 60 个小时，而我担心 1000 张图像（这将呈现像样的词汇量）可能需要 8 个月（！），（如果方法是 O( n²)->60 小时 *10² ~ 8 个月) 我不想想象整个数据库需要多长时间。

所以，我的问题是：是否可以以某种方式并行执行 cluster() 方法，以便系统的训练不会花费大量时间？我想过应用 openMP 编译指示，或者为每个循环创建一个线程，但考虑到

for()

循环的动态性，我认为这是不可能的。虽然我熟悉并行编程和多线程，但我根本不是这个领域的专家。

提前非常感谢！

Answer 1

对于有价值的内容，我将使用 OpenCV 的调用

parallel_for

编写的代码留在这里。我还在代码中添加了一个功能，现在它会删除所有小于阈值的簇。该代码有效地加快了该过程：

//The first nest of fors remains untouched, but the following ones: 

std::vector<std::list<cv::Mat> > clusters;
clusters.resize(initialCentres.size());

Mutex lock = Mutex();
parallel_for_(cv::Range(0, _descriptors.rows - 1),
        for_createClusters(clusters, initialCentres, icovar, _descriptors, lock));

Mat vocabulary;
Mat centre = Mat::zeros(1,_descriptors.cols,_descriptors.type());
parallel_for_(cv::Range(0, clusters.size() - 1), for_estimateCentres(clusters,
        vocabulary, centre, minSize, lock));

并且，在标题中：

//parallel_for_ for creating clusters:
class CV_EXPORTS for_createClusters: public ParallelLoopBody {
private:

std::vector<std::list<cv::Mat> >& bufferCluster;
const std::vector<Mat> initCentres;
const Mat icovar;
const Mat descriptorsParallel;
Mutex& lock_for;

public:
for_createClusters(std::vector<std::list<cv::Mat> >& _buffCl,
        const std::vector<Mat> _initCentres, const Mat _icovar,
        const Mat _descriptors, Mutex& _lock_for)
: bufferCluster (_buffCl), initCentres(_initCentres), icovar(_icovar),
  descriptorsParallel(_descriptors), lock_for(_lock_for){}


virtual void operator()( const cv::Range &r ) const
{
    for (register int f = r.start; f != r.end; ++f)
    {
        int index = 0; double dist = 0, minDist = DBL_MAX;
        for (register size_t j = 0; j < initCentres.size(); j++) {
            dist = cv::Mahalanobis(descriptorsParallel.row(f),
                    initCentres[j],icovar);
            if (dist < minDist) {
                minDist = dist;
                index = (int)j;
            }
        }
        {
//              AutoLock Lock(lock_for);
            lock_for.lock();
            bufferCluster[index].push_back(descriptorsParallel.row(f));
            lock_for.unlock();
        }
    }
    }
};

class CV_EXPORTS for_estimateCentres: public ParallelLoopBody {
private:

const std::vector<std::list<cv::Mat> > bufferCluster;
Mat& vocabulary;
const Mat centre;
const int minSizCl;
Mutex& lock_for;

public:
for_estimateCentres(const std::vector<std::list<cv::Mat> > _bufferCluster,
        Mat& _vocabulary, const Mat _centre, const int _minSizCl, Mutex& _lock_for)
: bufferCluster(_bufferCluster), vocabulary(_vocabulary),
  centre(_centre), minSizCl(_minSizCl), lock_for(_lock_for){}

virtual void operator()( const cv::Range &r ) const
{
    Mat ctr = Mat::zeros(1, centre.cols,centre.type());

    for (register int f = r.start; f != r.end; ++f){
        ctr.setTo(0);
        //Not taking into account small clusters
        if(bufferCluster[f].size() >= (size_t) minSizCl)
        {
            for (register std::list<cv::Mat>::const_iterator
                    Ci = bufferCluster[f].begin();
                    Ci != bufferCluster[f].end(); Ci++)
                        ctr += *Ci;

            ctr /= (double)bufferCluster[f].size();

            {
//              AutoLock Lock(lock_for);
                lock_for.lock();
                vocabulary.push_back(ctr);
                lock_for.unlock();
            }
        }
    }
  }
};

希望这对某人有帮助...

Answer 2

/*//////////////////////////////////////////////////////////////////////////////
//
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
//  By downloading, copying, installing or using the software you agree to this
//  license. If you do not agree to this license, do not download, install,
//  copy or use the software.
//
// This file originates from the openFABMAP project:
// [http://code.google.com/p/openfabmap/] -or-
// [https://github.com/arrenglover/openfabmap]
//
// For published work which uses all or part of OpenFABMAP, please cite:
// [http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6224843]
//
// Original Algorithm by Mark Cummins and Paul Newman:
// [http://ijr.sagepub.com/content/27/6/647.short]
// [http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=5613942]
// [http://ijr.sagepub.com/content/30/9/1100.abstract]
//
//                           License Agreement
//
// Copyright (C) 2012 Arren Glover [[email protected]] and
//                    Will Maddern [[email protected]], all rights reserved.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//  * Redistribution's of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimer.
//
//  * Redistribution's in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
//  * The name of the copyright holders may not be used to endorse or promote
//    products derived from this software without specific prior written
///   permission.
//
// This software is provided by the copyright holders and contributors "as is"
// and any express or implied warranties, including, but not limited to, the
// implied warranties of merchantability and fitness for a particular purpose
// are disclaimed. In no event shall the Intel Corporation or contributors be
// liable for any direct, indirect, incidental, special, exemplary, or
// consequential damages (including, but not limited to, procurement of
// substitute goods or services; loss of use, data, or profits; or business
// interruption) however caused and on any theory of liability, whether in
// contract, strict liability,or tort (including negligence or otherwise)
// arising in any way out of the use of this software, even if advised of the
// possibility of such damage.
//////////////////////////////////////////////////////////////////////////////*/

#include "bowmsctrainer.hpp"

#include <iostream>
#include <vector>
#include <list>

namespace of2 {

BOWMSCTrainer::BOWMSCTrainer(double _clusterSize) :
    clusterSize(_clusterSize) {
}

BOWMSCTrainer::~BOWMSCTrainer() {
}

cv::Mat BOWMSCTrainer::cluster() const {
    CV_Assert(!descriptors.empty());
    int descCount = 0;
    for(size_t i = 0; i < descriptors.size(); i++)
        descCount += descriptors[i].rows;

    cv::Mat mergedDescriptors(descCount, descriptors[0].cols,
            descriptors[0].type());
    for(size_t i = 0, start = 0; i < descriptors.size(); i++)
    {
        cv::Mat submut = mergedDescriptors.rowRange((int)start,
                                                    (int)(start + descriptors[i].rows));
        descriptors[i].copyTo(submut);
        start += descriptors[i].rows;
    }
    return cluster(mergedDescriptors);
}

cv::Mat BOWMSCTrainer::cluster(const cv::Mat& descriptors) const {

    CV_Assert(!descriptors.empty());

    // TODO: sort the descriptors before clustering.

    // Start timing
    int64 start_time = cv::getTickCount();

    // Used for Mahalanobis distance calculation, identity covariance
    cv::Mat icovar = cv::Mat::eye(descriptors.cols,descriptors.cols,descriptors.type());

    // Create initial centres guaranteeing a centre distance < minDist //

    // Loop through all the descriptors
    std::vector<cv::Mat> initialCentres;
    initialCentres.push_back(descriptors.row(0));

    for (int i = 1; i < descriptors.rows; i++)
    {
        double minDist = DBL_MAX;
#pragma omp parallel for if (initialCentres.size() > 100)
        for (int j = 0; j < (int)initialCentres.size(); j++)
        {
            // Our covariance is identity, just use the norm, it's faster.
            // cv::Mahalanobis(descriptors.row(i),initialCentres[j], icovar);
            double myDist = cv::norm(descriptors.row(i),initialCentres[j]);
#pragma omp critical
            minDist = std::min(minDist, myDist);
        }
        // Add new cluster if outside of range
        if (minDist > clusterSize)
            initialCentres.push_back(descriptors.row(i));

        // Status
        if ((i-1)%(descriptors.rows/10) == 0)
            std::cout << "." << std::flush;
    }
    // Status
    std::cout << "\nFinished initial clustering for "
              << descriptors.rows << " descriptors. "
              << initialCentres.size() << " initial clusters. "
              << std::endl;

    // Assign each descriptor to its closest centre //

    // Loop through all the descriptors again
    // TODO: Consider a kd-tree for this search
    std::vector<std::list<cv::Mat> > clusters;
    clusters.resize(initialCentres.size());
#pragma omp parallel for schedule(dynamic, 200)
    for (int i = 0; i < descriptors.rows; i++) {
        size_t index; double dist, minDist = DBL_MAX;
        for (size_t j = 0; j < initialCentres.size(); j++) {
            dist = cv::norm(descriptors.row(i),initialCentres[j]);
            if (dist < minDist) {
                minDist = dist;
                index = j;
            }
        }
#pragma omp critical // Order doesn't matter here
        clusters[index].push_back(descriptors.row(i));

        // Status (could be off because of parallelism, but a guess
        if ((i-1)%(descriptors.rows/10) == 0)
            std::cout << "." << std::flush;
    }
    // Status
    std::cout << "\nFinished re-assignment. "
              << std::endl;

    // Calculate the centre mean for each cluster //

    // Loop through all the clusters
    cv::Mat vocabulary;
#pragma omp parallel for schedule(static, 1) ordered
    for (int i = 0; i < (int)clusters.size(); i++) {
        // TODO: Throw away small clusters
        // TODO: Make this configurable
        // TODO: Re-assign?
        // if (clusters[i].size() < 3) continue;

        cv::Mat centre = cv::Mat::zeros(1,descriptors.cols,descriptors.type());
        for (std::list<cv::Mat>::iterator Ci = clusters[i].begin(); Ci != clusters[i].end(); Ci++) {
            centre += *Ci;
        }
        centre /= (double)clusters[i].size();
#pragma omp ordered // Ordered so it's identical to non omp.
            vocabulary.push_back(centre);

        // Status (could be off because of parallelism, but a guess
        if ((i-1)%(clusters.size()/10) == 0)
            std::cout << "." << std::flush;
    }

    // Finish timing
    int64 end_time = cv::getTickCount();

    // Status
    std::cout << "\nFinished finding the mean. "
              << vocabulary.rows << " words. "
              << (end_time-start_time)/cv::getTickFrequency() << " s. "
              << std::endl;

    return vocabulary;
}

}

嗨，你能帮我编辑这段代码吗？这样你的问题解决方案也适用于我。

提前致谢。我等待你的回复

OpenCV 中聚类方法的并行化

问题描述投票：0回答：2

2个回答

最新问题

OpenCV 中聚类方法的并行化

问题描述 投票：0回答：2

2个回答

最新问题

问题描述投票：0回答：2