我正在我的项目中训练用于闭环检测的 fabMap 算法。训练包括描述符、词汇和 Chow-Liu 树的创建。我有一个包含超过 10,000 张图像的数据库。我正在使用一个非常好的桌面(12 核双线程、32 GB RAM 和 6 GB Nvidia 显卡),并且我想在训练我的系统时充分利用它。我在 Windows 7 64 位系统上使用 opencv 3.0,启用了 TBB。
问题是只有描述符的提取是多线程的。 Chow-Liu 树的聚类和构建是在单个线程中执行的。 BOWMSCTrainer 类的 cluster() 方法有 3 个嵌套的
for()
循环,其中每个循环都依赖于前一个循环,甚至嵌套循环的大小也是动态分配的。这是 cluster() 方法的核心:
//_descriptors is a Matrix wherein each row is a descriptor
Mat icovar = Mat::eye(_descriptors.cols,_descriptors.cols,_descriptors.type());
std::vector<Mat> initialCentres;
initialCentres.push_back(_descriptors.row(0));
for (int i = 1; i < _descriptors.rows; i++) {
double minDist = DBL_MAX;
for (size_t j = 0; j < initialCentres.size(); j++) {
minDist = std::min(minDist,
cv::Mahalanobis(_descriptors.row(i),initialCentres[j],
icovar));
}
if (minDist > clusterSize)
initialCentres.push_back(_descriptors.row(i));
}
std::vector<std::list<cv::Mat> > clusters;
clusters.resize(initialCentres.size());
for (int i = 0; i < _descriptors.rows; i++) {
int index = 0; double dist = 0, minDist = DBL_MAX;
for (size_t j = 0; j < initialCentres.size(); j++) {
dist = cv::Mahalanobis(_descriptors.row(i),initialCentres[j],icovar);
if (dist < minDist) {
minDist = dist;
index = (int)j;
}
}
clusters[index].push_back(_descriptors.row(i));
}
// TODO: throw away small clusters.
Mat vocabulary;
Mat centre = Mat::zeros(1,_descriptors.cols,_descriptors.type());
for (size_t i = 0; i < clusters.size(); i++) {
centre.setTo(0);
for (std::list<cv::Mat>::iterator Ci = clusters[i].begin(); Ci != clusters[i].end(); Ci++) {
centre += *Ci;
}
centre /= (double)clusters[i].size();
vocabulary.push_back(centre);
}
return vocabulary;
}
为了了解训练需要多长时间,我对数据库进行了下采样。我一开始只使用了 10 张图像(约 20,000 个描述符),花了大约 40 分钟。对于 100 张图像(约 300.000 个描述符)的样本,整个过程花费了大约 60 个小时,而我担心 1000 张图像(这将呈现像样的词汇量)可能需要 8 个月(!),(如果方法是 O( n²)->60 小时 *10² ~ 8 个月) 我不想想象整个数据库需要多长时间。
所以,我的问题是:是否可以以某种方式并行执行 cluster() 方法,以便系统的训练不会花费大量时间?我想过应用 openMP 编译指示,或者为每个循环创建一个线程,但考虑到
for()
循环的动态性,我认为这是不可能的。虽然我熟悉并行编程和多线程,但我根本不是这个领域的专家。
提前非常感谢!
对于有价值的内容,我将使用 OpenCV 的调用
parallel_for
编写的代码留在这里。我还在代码中添加了一个功能,现在它会删除所有小于阈值的簇。该代码有效地加快了该过程:
//The first nest of fors remains untouched, but the following ones:
std::vector<std::list<cv::Mat> > clusters;
clusters.resize(initialCentres.size());
Mutex lock = Mutex();
parallel_for_(cv::Range(0, _descriptors.rows - 1),
for_createClusters(clusters, initialCentres, icovar, _descriptors, lock));
Mat vocabulary;
Mat centre = Mat::zeros(1,_descriptors.cols,_descriptors.type());
parallel_for_(cv::Range(0, clusters.size() - 1), for_estimateCentres(clusters,
vocabulary, centre, minSize, lock));
并且,在标题中:
//parallel_for_ for creating clusters:
class CV_EXPORTS for_createClusters: public ParallelLoopBody {
private:
std::vector<std::list<cv::Mat> >& bufferCluster;
const std::vector<Mat> initCentres;
const Mat icovar;
const Mat descriptorsParallel;
Mutex& lock_for;
public:
for_createClusters(std::vector<std::list<cv::Mat> >& _buffCl,
const std::vector<Mat> _initCentres, const Mat _icovar,
const Mat _descriptors, Mutex& _lock_for)
: bufferCluster (_buffCl), initCentres(_initCentres), icovar(_icovar),
descriptorsParallel(_descriptors), lock_for(_lock_for){}
virtual void operator()( const cv::Range &r ) const
{
for (register int f = r.start; f != r.end; ++f)
{
int index = 0; double dist = 0, minDist = DBL_MAX;
for (register size_t j = 0; j < initCentres.size(); j++) {
dist = cv::Mahalanobis(descriptorsParallel.row(f),
initCentres[j],icovar);
if (dist < minDist) {
minDist = dist;
index = (int)j;
}
}
{
// AutoLock Lock(lock_for);
lock_for.lock();
bufferCluster[index].push_back(descriptorsParallel.row(f));
lock_for.unlock();
}
}
}
};
class CV_EXPORTS for_estimateCentres: public ParallelLoopBody {
private:
const std::vector<std::list<cv::Mat> > bufferCluster;
Mat& vocabulary;
const Mat centre;
const int minSizCl;
Mutex& lock_for;
public:
for_estimateCentres(const std::vector<std::list<cv::Mat> > _bufferCluster,
Mat& _vocabulary, const Mat _centre, const int _minSizCl, Mutex& _lock_for)
: bufferCluster(_bufferCluster), vocabulary(_vocabulary),
centre(_centre), minSizCl(_minSizCl), lock_for(_lock_for){}
virtual void operator()( const cv::Range &r ) const
{
Mat ctr = Mat::zeros(1, centre.cols,centre.type());
for (register int f = r.start; f != r.end; ++f){
ctr.setTo(0);
//Not taking into account small clusters
if(bufferCluster[f].size() >= (size_t) minSizCl)
{
for (register std::list<cv::Mat>::const_iterator
Ci = bufferCluster[f].begin();
Ci != bufferCluster[f].end(); Ci++)
ctr += *Ci;
ctr /= (double)bufferCluster[f].size();
{
// AutoLock Lock(lock_for);
lock_for.lock();
vocabulary.push_back(ctr);
lock_for.unlock();
}
}
}
}
};
希望这对某人有帮助...
/*//////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this
// license. If you do not agree to this license, do not download, install,
// copy or use the software.
//
// This file originates from the openFABMAP project:
// [http://code.google.com/p/openfabmap/] -or-
// [https://github.com/arrenglover/openfabmap]
//
// For published work which uses all or part of OpenFABMAP, please cite:
// [http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6224843]
//
// Original Algorithm by Mark Cummins and Paul Newman:
// [http://ijr.sagepub.com/content/27/6/647.short]
// [http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=5613942]
// [http://ijr.sagepub.com/content/30/9/1100.abstract]
//
// License Agreement
//
// Copyright (C) 2012 Arren Glover [[email protected]] and
// Will Maddern [[email protected]], all rights reserved.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote
// products derived from this software without specific prior written
/// permission.
//
// This software is provided by the copyright holders and contributors "as is"
// and any express or implied warranties, including, but not limited to, the
// implied warranties of merchantability and fitness for a particular purpose
// are disclaimed. In no event shall the Intel Corporation or contributors be
// liable for any direct, indirect, incidental, special, exemplary, or
// consequential damages (including, but not limited to, procurement of
// substitute goods or services; loss of use, data, or profits; or business
// interruption) however caused and on any theory of liability, whether in
// contract, strict liability,or tort (including negligence or otherwise)
// arising in any way out of the use of this software, even if advised of the
// possibility of such damage.
//////////////////////////////////////////////////////////////////////////////*/
#include "bowmsctrainer.hpp"
#include <iostream>
#include <vector>
#include <list>
namespace of2 {
BOWMSCTrainer::BOWMSCTrainer(double _clusterSize) :
clusterSize(_clusterSize) {
}
BOWMSCTrainer::~BOWMSCTrainer() {
}
cv::Mat BOWMSCTrainer::cluster() const {
CV_Assert(!descriptors.empty());
int descCount = 0;
for(size_t i = 0; i < descriptors.size(); i++)
descCount += descriptors[i].rows;
cv::Mat mergedDescriptors(descCount, descriptors[0].cols,
descriptors[0].type());
for(size_t i = 0, start = 0; i < descriptors.size(); i++)
{
cv::Mat submut = mergedDescriptors.rowRange((int)start,
(int)(start + descriptors[i].rows));
descriptors[i].copyTo(submut);
start += descriptors[i].rows;
}
return cluster(mergedDescriptors);
}
cv::Mat BOWMSCTrainer::cluster(const cv::Mat& descriptors) const {
CV_Assert(!descriptors.empty());
// TODO: sort the descriptors before clustering.
// Start timing
int64 start_time = cv::getTickCount();
// Used for Mahalanobis distance calculation, identity covariance
cv::Mat icovar = cv::Mat::eye(descriptors.cols,descriptors.cols,descriptors.type());
// Create initial centres guaranteeing a centre distance < minDist //
// Loop through all the descriptors
std::vector<cv::Mat> initialCentres;
initialCentres.push_back(descriptors.row(0));
for (int i = 1; i < descriptors.rows; i++)
{
double minDist = DBL_MAX;
#pragma omp parallel for if (initialCentres.size() > 100)
for (int j = 0; j < (int)initialCentres.size(); j++)
{
// Our covariance is identity, just use the norm, it's faster.
// cv::Mahalanobis(descriptors.row(i),initialCentres[j], icovar);
double myDist = cv::norm(descriptors.row(i),initialCentres[j]);
#pragma omp critical
minDist = std::min(minDist, myDist);
}
// Add new cluster if outside of range
if (minDist > clusterSize)
initialCentres.push_back(descriptors.row(i));
// Status
if ((i-1)%(descriptors.rows/10) == 0)
std::cout << "." << std::flush;
}
// Status
std::cout << "\nFinished initial clustering for "
<< descriptors.rows << " descriptors. "
<< initialCentres.size() << " initial clusters. "
<< std::endl;
// Assign each descriptor to its closest centre //
// Loop through all the descriptors again
// TODO: Consider a kd-tree for this search
std::vector<std::list<cv::Mat> > clusters;
clusters.resize(initialCentres.size());
#pragma omp parallel for schedule(dynamic, 200)
for (int i = 0; i < descriptors.rows; i++) {
size_t index; double dist, minDist = DBL_MAX;
for (size_t j = 0; j < initialCentres.size(); j++) {
dist = cv::norm(descriptors.row(i),initialCentres[j]);
if (dist < minDist) {
minDist = dist;
index = j;
}
}
#pragma omp critical // Order doesn't matter here
clusters[index].push_back(descriptors.row(i));
// Status (could be off because of parallelism, but a guess
if ((i-1)%(descriptors.rows/10) == 0)
std::cout << "." << std::flush;
}
// Status
std::cout << "\nFinished re-assignment. "
<< std::endl;
// Calculate the centre mean for each cluster //
// Loop through all the clusters
cv::Mat vocabulary;
#pragma omp parallel for schedule(static, 1) ordered
for (int i = 0; i < (int)clusters.size(); i++) {
// TODO: Throw away small clusters
// TODO: Make this configurable
// TODO: Re-assign?
// if (clusters[i].size() < 3) continue;
cv::Mat centre = cv::Mat::zeros(1,descriptors.cols,descriptors.type());
for (std::list<cv::Mat>::iterator Ci = clusters[i].begin(); Ci != clusters[i].end(); Ci++) {
centre += *Ci;
}
centre /= (double)clusters[i].size();
#pragma omp ordered // Ordered so it's identical to non omp.
vocabulary.push_back(centre);
// Status (could be off because of parallelism, but a guess
if ((i-1)%(clusters.size()/10) == 0)
std::cout << "." << std::flush;
}
// Finish timing
int64 end_time = cv::getTickCount();
// Status
std::cout << "\nFinished finding the mean. "
<< vocabulary.rows << " words. "
<< (end_time-start_time)/cv::getTickFrequency() << " s. "
<< std::endl;
return vocabulary;
}
}
嗨,你能帮我编辑这段代码吗?这样你的问题解决方案也适用于我。
提前致谢。我等待你的回复