为什么 std::execution::par 不使用 std::views::iota 迭代器启动线程

Question

这里有两个版本的代码。一个在

std::vector

调用中使用

std::for_each

迭代器，另一个使用

std::views::iota

迭代器。

我希望这两个都使用多线程，因为我正在使用

std::execution::par

，但情况似乎并非如此。

iota

版本仅使用单个线程，我不明白为什么。

矢量版

#include <algorithm>
#include <execution>
#include <iostream>
#include <mutex>
#include <ranges>
#include <thread>
#include <vector>

int main()
{
   std::mutex io_mutex;
   auto offsets0_ = std::views::iota(0, 8);
   auto offsets0 = std::vector<int>(std::begin(offsets0_), std::end(offsets0_));
   std::for_each(
      std::execution::par,
      std::begin(offsets0), std::end(offsets0),
      [&](auto offset0) {
         io_mutex.lock();
         std::cout << std::this_thread::get_id() << ": " << offset0 << '\n';

         // Emulate doing a bunch of work, otherwise threads don't spawn in either scenario
         std::this_thread::sleep_for(std::chrono::milliseconds(200));
         io_mutex.unlock();
      });
}

输出

139803706330688: 0
139803706330688: 1
139803695208000: 6
139803691009600: 5
139803699406400: 4
139803706330688: 2
139803695208000: 7
139803691009600: 3

所以它使用了 4 个线程来遍历所有项目。我的电脑有 4 个核心，所以这是有道理的。

Iota范围版本

#include <algorithm>
#include <execution>
#include <iostream>
#include <mutex>
#include <ranges>
#include <thread>
#include <vector>

int main()
{
   std::mutex io_mutex;
   auto offsets0 = std::views::iota(0, 8);
   std::for_each(
      std::execution::par,
      std::begin(offsets0), std::end(offsets0),
      [&](auto offset0) {
         io_mutex.lock();
         std::cout << std::this_thread::get_id() << ": " << offset0 << '\n';

         // Emulate doing a bunch of work, otherwise threads don't spawn in either scenario
         std::this_thread::sleep_for(std::chrono::milliseconds(200));
         io_mutex.unlock();
      });
}

输出

139954983456320: 0
139954983456320: 1
139954983456320: 2
139954983456320: 3
139954983456320: 4
139954983456320: 5
139954983456320: 6
139954983456320: 7

使用iota范围迭代器它只有一个线程..为什么？范围不应该这样使用吗？

两者都是使用以下选项编译的：

g++ main.cpp -std=c++20 -ltbb -O3

。使用 gcc 11.1.0、glibc 2.33 和 tbb 2020.3。

Answer 1

根据 cppreference，这是带有执行策略参数的 for_each 签名。

template< class ExecutionPolicy, class ForwardIt, class UnaryFunction2 >
void for_each( ExecutionPolicy&& policy, ForwardIt first, ForwardIt last, UnaryFunction2 f );

其中

ForwardIt

应满足 LegacyForwardIterator 要求。

但是，当

iota_view::iterator

是“可增量”时，

value_type

仅会上升到 LegacyInputIterator，根据我的理解，它永远不会满足

LegacyForwardIterator

。 cpp参考

这可能就是为什么

for_each

调用没有达到您预期的效果。

我不确定为什么这不仅仅是一个编译错误，也许它是故意的，尽管完全忽略执行策略确实感觉不寻常。我希望另一个答案可以阐明这一点。

Answer 2

查看gcc11自带的标准库，没有检查到

std::iterator_traits<Iterator>::iterator_category

的

std::random_access_iterator_tag

。

auto view = std::views::iota(0, 100);
using Iterator = decltype(view.begin());
using IteratorCategory = std::iterator_traits<Iterator>::iterator_category;

// yes
static_assert(std::random_access_iterator<Iterator>);
static_assert(std::is_same_v<IteratorCategory,std::input_iterator_tag>);

// no
static_assert(std::is_same_v<IteratorCategory,std::random_access_iterator_tag>);
static_assert(typename __pstl::__internal::__is_random_access_iterator<Iterator>::type());

std::for_each(std::execution::par_unseq, view.begin(), view.end(), [](int i){
    usleep(100 - i);
    printf("%i\n", i);
});

https://godbolt.org/z/1Y9sTbcEq

来自：

template <class _ExecutionPolicy, class _ForwardIterator, class _Function>
__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f)
{
    __pstl::__internal::__pattern_walk1(
        std::forward<_ExecutionPolicy>(__exec), __first, __last, __f,
        __pstl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec),
        __pstl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec));
}

...

template <typename _ExecutionPolicy, typename... _IteratorTypes>
auto
__is_parallelization_preferred(_ExecutionPolicy&& __exec)
    -> decltype(__internal::__lazy_and(__exec.__allow_parallel(),
                                       typename __internal::__is_random_access_iterator<_IteratorTypes...>::type()))
{
    return __internal::__lazy_and(__exec.__allow_parallel(),
                                  typename __internal::__is_random_access_iterator<_IteratorTypes...>::type());
}

...

template <typename _IteratorType>
struct __is_random_access_iterator<_IteratorType>
    : std::is_same<typename std::iterator_traits<_IteratorType>::iterator_category, std::random_access_iterator_tag>
{
};

为了进一步演示，这是一个丑陋的解决方案，它通过自定义

std::random_access_iterator_tag

满足

iota_iterator

:


template<class T>
struct iota_iterator {
    using value_type = T;
    using difference_type = T;
    using pointer = T*;
    using reference = T&;
    using iterator_category = std::random_access_iterator_tag;
    iota_iterator() = default;
    iota_iterator(const iota_iterator& other) noexcept = default;
    iota_iterator(iota_iterator&& other) noexcept = default;
    iota_iterator& operator=(const iota_iterator& other) noexcept = default;
    iota_iterator& operator=(iota_iterator&& other) noexcept = default;
    iota_iterator(T i) : i(i) {}
    value_type operator*() const { return i; }
    iota_iterator& operator++() { ++i; return *this; }
    iota_iterator operator++(int) { iota_iterator t(*this); ++*this; return t; }
    iota_iterator& operator--() { --i; return *this; }
    iota_iterator operator--(int) { iota_iterator t(*this); --*this; return t; }
    iota_iterator operator+(difference_type d) const { return {i + d}; }
    iota_iterator operator-(difference_type d) const { return {i - d}; }
    iota_iterator& operator+=(difference_type d) { i += d; return *this; }
    iota_iterator& operator-=(difference_type d) { i -= d; return *this; }
    bool operator==(const iota_iterator& other) const { return i == other.i; }
    bool operator!=(const iota_iterator& other) const { return i != other.i; }
    bool operator<(const iota_iterator& other) const { return i < other.i; }
    bool operator<=(const iota_iterator& other) const { return i <= other.i; }
    bool operator>(const iota_iterator& other) const { return i > other.i; }
    bool operator>=(const iota_iterator& other) const { return i >= other.i; }
    difference_type operator-(const iota_iterator& other) const { return i - other.i; }
    friend iota_iterator operator+(difference_type n, const iota_iterator& it) {
        return it + n;
    }
    T operator[](difference_type d) const { return i + d; }
private:
    T i = 0;
};

template<class T>
struct iota_view {
    using iterator = iota_iterator<T>;
    T m_begin, m_end;
    iterator begin() const { return {m_begin}; };
    iterator end() const { return {m_end}; };
};

https://godbolt.org/z/EsxEh8f86

为什么 std::execution::par 不使用 std::views::iota 迭代器启动线程

问题描述投票：0回答：2

矢量版

输出

Iota范围版本

输出

2个回答

最新问题

为什么 std::execution::par 不使用 std::views::iota 迭代器启动线程

问题描述 投票：0回答：2

矢量版

输出

Iota范围版本

输出

2个回答

最新问题

问题描述投票：0回答：2