parallel_accumulate


Too young的一个小练手……

#include <iostream>
#include <chrono>
#include <thread>
#include <vector>
#include <cstdlib>
#include <algorithm>
#include <numeric>

using namespace std;

vector<int> v;

template<typename T, typename U>
void MyAccuX(T beg, T ed, U& ans)
{
    ans = accumulate(beg, ed, 0);
}

template<typename T, typename T3>
T3 parallel_accu( T beg,  T ed, T3 initial)
{
    size_t d = distance(beg, ed);
    size_t part = thread::hardware_concurrency();
    size_t inteval = d / part;

    T tmp(beg);
    advance(tmp, inteval);

    vector<thread> vt;
    vector<T3> result(part);
    for(size_t i=0; i<part; ++i)
    {
        vt.push_back( thread(MyAccuX<T, T3>, beg, tmp, ref(result[i])));
    }
    for(size_t i=0; i<part; ++i)
    {
        if (vt[i].joinable()) vt[i].join();
    }

    for(size_t i=0; i<part; ++i)
    {
        initial += result[i];
    }
    return initial;
}

int main()
{
    cout << "Generating data" << endl;
    v.reserve(500000000);
    for(long i=0; i<500000000; ++i)
    {
        if (!(i%5000000)) cout << i/5000000<<"%"<<endl;
        v.push_back(rand());
    }

    cout << "begin"<<endl;
    auto t1 = chrono::high_resolution_clock::now();
    cout << accumulate(v.begin(), v.end(), 0LL) << endl; //4.18088
    //cout << parallel_accu(v.begin(), v.end(), 0LL) << endl; // 1.1092
    auto t2 = chrono::high_resolution_clock::now();
    cout << chrono::duration_cast<chrono::microseconds>(t2-t1).count() / 1000000.0 << endl;
}