// ================================================================== // Author: Jean-Michel RICHER // Email: jean-michel.richer@univ-angers.fr // Institution: LERIA, Faculty of Science // University of Angers, France // ================================================================== #include #include #include #include using namespace std; /** * Send vector contents to output stream * */ template ostream& operator<<( ostream& out, vector& v) { for (auto x : v) out << x << " "; return out; } /* ================================================================== * SUBPROGRAM * * vector& sequential_inclusive_scan( vector &x, * vector& y ); * * WHAT * * Perform inclusive scan in parallel with two buffers * * HOW * * We use a log2(n) algorithm, however, for the result to be correct * we need to add 1 to log2(n) if n > 2^log2(n). For example, if n, * the size of the input vector @param(in) is equal to 8, then * @var(log2n) = 3. But if n == 9, @var(log2n) should be set to 4. * * At each step we compute 2^(d-1) that we call @var(shift) where * @var(d) ranges from 1 to @var(log2n). * * We copy the data from the input vector @param(in) from 0 to * @var(shift)-1. * We add to the other elements of the input vector, the elements * that are shifted from @var(shift). * * * PARAMETERS * * @paramdef(vector&, in) : input vector of data * @paramdef(vector&, out) : vector used for calculations * * * RETURN VALUE * @return(vector&) reference to last modified vector * * ================================================================== */ vector& parallel_naive_inclusive_scan( vector &x, vector& y ) { vector &in = x; vector &out = y; int n = static_cast( x.size() ); // if use this, then we won't have the correct result if n // is not a power of 2 //int log2n = round( log2( n ) ); int log2n = 32 - __builtin_clz( n ) - 1; if (__builtin_popcount( n ) > 1) ++log2n; for (int d = 1; d <= log2n; ++d) { int shift = 1 << (d-1); // copy values under shift #pragma omp parallel for for (int i = 0; i < shift; ++i) { out[ i ] = in [ i ]; } // add values over shift #pragma omp parallel for for (int i = shift; i < n; ++i) { out[ i ] = in[ i ] + in[ i - shift ]; } swap( in, out ); } return in; } /* ================================================================== * SUBPROGRAM * * int main(int argc, char *argv[]) * * WHAT * * Main program * * PARAMETERS * * @paramdef(int,argc) : number of command line arguments * @paramdef(char **, argv) : command line arguments as C-strings * * ================================================================== */ int main(int argc, char *argv[]) { vector x1; vector x2; int n = 8; if (argc > 1) n = atoi( argv[1] ); x1.resize( n ); x2.resize( n ); //iota(x1.begin(), x1.end(), 1); for (auto i = x1.begin(); i != x1.end(); ++i) *i = 1; cout << "initial data=" << x1 << endl; vector& r = sequential_inclusive_scan( x1, x2 ); cout << "inclusive scan=" << r << endl; return EXIT_SUCCESS; }