我有两个线程,一个正在
find()
上执行 tbb::concurrent_hash_map
,另一个正在遍历此地图而不进行任何插入或删除。奇怪的是,如果不执行find()
,迭代遍历是正常的,但是当执行find()
时,使用HashMap::iterator
迭代得到的数据量不一致。为什么会出现这种情况?
(待定版本2021.5.0-7ubuntu2。)
我添加了一个测试示例,包括两段代码。一个是我封装的地方
tbb::concurrent_hash_map
,另一个是测试示例。结果如下:
writeTime=0.367443
------------finish write---------------
hashMap size = 3174014
trav_cnt=3480029
error: trav_cnt=3480029 hashmap_size=3174014
-----readTime=2.19917
测试代码:
#include<map>
#include<string>
#include<vector>
#include<fstream>
#include<iostream>
#include <omp.h>
#include <tbb/concurrent_hash_map.h>
#include <tbb/tbb.h>
template <typename KeyType, typename ValueType>
class ConcurrentHashMap {
private:
typedef tbb::concurrent_hash_map<KeyType, ValueType> HashMap;
typedef typename HashMap::const_accessor HashMapConstAccessor;
typedef typename HashMap::accessor HashMapAccessor;
typedef typename HashMap::iterator HashMapIterator;
typedef typename HashMap::value_type HashMapValuePair;
public:
ConcurrentHashMap (ValueType default_value) : DEFAULT_VALUE(default_value) {
}
size_t size() {
return hashMap.size();
}
bool insert(const KeyType& key, const ValueType& value) {
HashMapAccessor accessor;
bool inserted = hashMap.insert(accessor, key);
if (inserted) {
accessor->second = value;
}
return inserted;
}
bool erase(const KeyType& key) {
return hashMap.erase(key);
}
bool find(const KeyType& key, ValueType& value) {
HashMapConstAccessor accessor;
if (hashMap.find(accessor, key)) {
value = accessor->second;
return true;
}
value = DEFAULT_VALUE;
return false;
}
void clear() {
hashMap.clear();
}
const ValueType& operator[](const KeyType& key) const {
HashMapConstAccessor accessor;
hashMap.find(accessor, key);
if (hashMap.find(accessor, key)) {
return accessor->second;
}
// accessor->second = DEFAULT_VALUE;
return DEFAULT_VALUE;
}
HashMapIterator begin() {
return hashMap.begin();
}
HashMapIterator end() {
return hashMap.end();
}
private:
HashMap hashMap;
ValueType DEFAULT_VALUE;
};
class A;
using HashMap = ConcurrentHashMap<int, A*>;
// using HashMap = ConcurrentUnorderedMap<int, A*>;
class A {
public:
A(int _a, int _b): a(_a), b(_b) {
}
void sub () {
}
int a = 1;
int b = 0;;
};
void test(int N, HashMap& hashMap) {
int thread_num = 16;
std::thread writer(
[&] () {
auto writeStartTime = std::chrono::high_resolution_clock::now();
#pragma omp parallel for num_threads(thread_num)
for (int i = 0; i < N; i++) {
hashMap.insert(i, new A(1, i));
}
auto writeEndTime = std::chrono::high_resolution_clock::now();
double writeTime = std::chrono::duration<double>(writeEndTime
- writeStartTime).count();
std::cout << "writeTime=" << writeTime << std::endl;
}
);
writer.join();
}
int random_uniform_int(const int min = 0, const int max = 1) {
unsigned seed = 2000;
static thread_local std::mt19937 generator(seed);
std::uniform_int_distribution<int> distribution(min, max);
return distribution(generator);
}
int main () {
// cmd: g++ test_con_hashmap.cpp -fopenmp -ltbb && ./a.out
int N = 3174014;
std::nullptr_t NULLPOINTER = nullptr;
HashMap hashMap(NULLPOINTER);
test(N, hashMap);
std::cout << "------------finish write---------------" << std::endl;
size_t hashmap_size = hashMap.size();
std::cout << "\n hashMap size = " << hashmap_size << std::endl;
{
int thread_num = 32;
std::thread reader(
[&] () {
std::this_thread::sleep_for(std::chrono::milliseconds(1));
auto readStartTime = std::chrono::high_resolution_clock::now();
for (int i = 0; i < 30; i++) {
#pragma omp parallel for num_threads(thread_num)
for (int i = 0; i < N; i++) {
A* value;
int randomKey = random_uniform_int(0, N - 1);
if(hashMap.find(randomKey, value)){
}
}
}
auto readEndTime = std::chrono::high_resolution_clock::now();
double readTime = std::chrono::duration<double>(readEndTime
- readStartTime).count();
std::cout << "-----readTime=" << readTime << std::endl;
}
);
size_t trav_cnt = 0;
for(auto iterator1 = hashMap.begin();
iterator1 != hashMap.end(); ++iterator1 ){
trav_cnt++;
}
std::cout << " trav_cnt=" << trav_cnt << std::endl;
if (trav_cnt != hashmap_size) {
std::cout << "\n error: trav_cnt=" << trav_cnt
<< " hashmap_size=" << hashmap_size
<< std::endl;
}
reader.join();
}
hashMap.clear();
return 0;
}
我想知道为什么
find()
的读操作会影响遍历操作?
不幸的是,TBB ConcurrentHashMap 上的迭代不是线程安全的:
本节中的所有成员函数只能串行执行。 在并发执行这些的情况下,行为是未定义的 具有其他(同时安全)方法的成员函数。
TBB并发_无序_map具有安全迭代,但缺乏
erase
。
Anton Malakhov 有一篇关于原因的有趣博客,但其中一篇被埋在 intel.com 内的某个地方。