std::thread::hardware_concurrency()在C++标准库中,这个函数将返回能同时并发在一个程序中的线程数量。
template<typename Iterator, typename T>
struct accumulate_block
{
void operator()(Iterator first, Iterator last, T& result)
{
result = std::accumulate(first, last, result);
}
};
template<typename Iterator, typename T>
T Parallel_accumulate(Iterator first, Iterator last, T init)
{
//输入范围,length
unsigned long long length = std::distance(first, last);
if (!length) //1
return init;
unsigned long const min_per_thread = 25;
//确定启动线程的最大数量
unsigned long const max_threads =
(length + min_per_thread - 1) / min_per_thread; //2
unsigned long const hardware_threads =
std::thread::hardware_concurrency();
//启动线程数量的确定
unsigned long const num_threads = //3
std::min(hardware_threads != 0 ? hardware_threads : max_threads);
//每个线程中的元素数量
unsigned long const block_size = length / num_threads; //4
std::vector<T> results(num_threads);
//启动的线程数必须必num_threads少1个,因为在启动之前已经有一个主线程了。
std::vector<std::thread> threads(num_threads - 1);//5
Iterator block_start = first;
for (unsigned long i = 0; i < (num_threads - 1); ++i)
{
Iterator block_end = block_start;
//block_end指向当前块的末尾
std::advance(block_end, block_size);//6
//启动一个新线程为当前块累加结果
threads[i] = std::thread( //7
accumulate_block<Iterator, T>(),
block_start, block_end, std::ref(results[i]));
//当迭代器指向当前块的末尾时,启动下一块
block_start = block_end; //8
}
//处理最终块的结果
accumulate_block<Iterator, T>()(
block_start, last, results[num_threads - 1]); //9
std::for_each(threads.begin(), threads.end(),
std::mem_fn(&std::thread::join)); //10
return std::accumulate(results.begin(), results.end(), init); //11
}
启动线程数量的确定,计算量的最大值和硬件支持线程数中较小的值为启动线程数量。
mem_fn
mem_fn里面的mem就是指类的成员member, 而fn就是指function, 加在一起就是说member function,即mem_fn是用来适配类的成员函数的。把成员函数转为函数对象,使用对象指针或对象(引用)进行绑定。