3. C++ et le monde extérieur




3.3. Librairies dédiées au calcul scientifique

3.3.1. Complex<T> de la STL

Pour les calculs avec des nombres complexes on peut utiliser la STL qui dispose d'un type Complex<T> à instancier.

  1. #include <iostream>
  2. #include <complex>
  3. #include <cstdlib>
  4. #include <valarray>
  5. using namespace std;
  6.  
  7. typedef complex<double> Complex;
  8.  
  9. int main() {
  10.     Complex c2(2, 0);
  11.     Complex ci(0, 1);
  12.  
  13.     cout << ci << " + " << c2 << " = " << ci+c2 << '\n'
  14.          << ci << " * " << ci << " = " << ci*ci << '\n'
  15.          << ci << " + " << c2 << " / " << ci << " = " << ci+c2/ci << '\n'
  16.          << 1  << " / " << ci << " = " << 1./ci << '\n';
  17.  
  18.     cout << "norm of c2 = " << norm(c2) << endl;
  19.     cout << "norm of ci = " << norm(ci) << endl;
  20.      
  21.     cout << "real(c2) = " << real(c2) << endl;
  22.     cout << "imag(ci) = " << imag(ci) << endl;
  23.      
  24.      
  25.     valarray<complex<double> > v(10);
  26.     for (size_t i = 0; i <v.size(); ++i) {
  27.         v[i].real( static_cast<double>(rand() % 1000) / 1000);
  28.         v[i].imag( static_cast<double>(rand() % 1000) / 1000);
  29.     }
  30.    
  31.     for (auto x : v) { 
  32.         cout << x << endl;
  33.     }
  34.     complex<double> sc = v.sum();
  35.     cout << "===========" << endl;
  36.     cout << "sum = " << sc << endl;
  37.      
  38.     exit(EXIT_SUCCESS);
  39. }
  40.  
(0,1) + (2,0) = (2,1)
(0,1) * (0,1) = (-1,0)
(0,1) + (2,0) / (0,1) = (0,-1)
1 / (0,1) = (0,-1)
norm of c2 = 4
norm of ci = 1
real(c2) = 2
imag(ci) = 1
(0.383,0.886)
(0.777,0.915)
(0.793,0.335)
(0.386,0.492)
(0.649,0.421)
(0.362,0.027)
(0.69,0.059)
(0.763,0.926)
(0.54,0.426)
(0.172,0.736)
===========
sum = (5.515,5.223)

3.3.2. BLAS

BLAS (Basic Linear Algebra Subprograms) est une librairie de sous-programmes optimisés pour calcul matriciel :

BLAS Reference Card

Sous Ubuntu, installer les packages : libblas3 libblas-dev

Exemple produit de matrices avec double DGEMM (Double GEneral Matrix Matrix) :

$$ C ← α op(A) × op(B) + β C $$

$α$ et $β$ sont des réels et $op$ est soit l'identité, la transposée ou la transposée conjuguée.

  1. #include <iostream>
  2. #include <numeric>
  3. using namespace std;
  4. #include <cblas.h>
  5. #include <xmmintrin.h>
  6.  
  7. /*
  8. extern "C" void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
  9.                  const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
  10.                  const int K, const double alpha, const double *A,
  11.                  const int lda, const double *B, const int ldb,
  12.                  const double beta, double *C, const int ldc);
  13.  */
  14. int DIM = 1000;
  15. double *A;
  16. double *B;
  17. double *C;
  18.                  
  19. int main(int argc, char *argv[]) {
  20.     char TRANS = 'N';
  21.     double alpha = 1.0;
  22.     double beta = 0.0;
  23.  
  24.     srand(19702013);
  25.    
  26.     if (argc > 1) {
  27.         DIM = atoi(argv[1]);
  28.     }
  29.    
  30.     int size = DIM*DIM;
  31.    
  32.     // allocate
  33.     A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  34.     B = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  35.     C = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  36.    
  37.     // fill
  38.     for (int i=0; i<size; ++i) {
  39.         A[i] = (rand() % 131) * 0.01;
  40.         B[i] = (rand() % 131) * 0.01;
  41.     }
  42.    
  43.     cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, DIM, DIM, DIM,
  44.         alpha, A, DIM, B, DIM, beta, C, DIM);
  45.  
  46.     // check result
  47.     double sum = accumulate(&C[0], &C[size], 0.0);
  48.    
  49.     cout << "sum = " << sum << endl;
  50.    
  51.     _mm_free(A);
  52.     _mm_free(B);
  53.     _mm_free(C);
  54.     return 0;
  55. }
  56.  

3.3.3. LAPACK

LAPACK (or Linear Algebra PACKage) provides routines :

Sous Ubuntu installer les packages liblapack3 (dynamique) ou liblapack-dev (statique)

3.3.4. Intel MKL (Math Kernel Library)

Intel Math Kernel Library (Intel® MKL) includes a wealth of math processing routines to accelerate application performance and reduce development time. Intel® MKL includes highly vectorized and threaded Linear Algebra, Fast Fourier Transforms (FFT), Vector Math and Statistics functions. The easiest way to take advantage of all of that processing power is to use a carefully optimized computing math library. Even the best compiler can’t compete with the level of performance possible from a hand-optimized library. If your application already relies on the BLAS or LAPACK functionality, simply re-link with Intel® MKL to get better performance on Intel and compatible architectures.

Voir MKL Cookbook et MKL Link Advisor

3.3.4.a  Exemple produit de matrix avec double

On utilise encore DGEMM de MKL :

  1. /*
  2. source /opt/intel/bin/iccvars.sh intel64
  3. icpc -o mkl_matrix_product.exe mkl_matrix_product.cpp -O3 -msse2 -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_sequential.a -Wl,--end-group -lpthread -lm
  4.  
  5. icpc -o mkl_matrix_product.exe mkl_matrix_product.cpp -O3 -msse2 -DMKL_ILP64 -openmp -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm
  6.  
  7. */
  8. #include <iostream>
  9. #include <numeric>
  10. using namespace std;
  11. #include <mkl.h>
  12. #include <xmmintrin.h>
  13.  
  14. /*
  15. extern "C" void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
  16.                  const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
  17.                  const int K, const double alpha, const double *A,
  18.                  const int lda, const double *B, const int ldb,
  19.                  const double beta, double *C, const int ldc);
  20.  */
  21. int DIM = 1000;
  22. double *A;
  23. double *B;
  24. double *C;
  25.  
  26. // ----------------------------------------------
  27. // main function
  28. // ----------------------------------------------
  29. int main(int argc, char *argv[]) {
  30.     char TRANS = 'N';
  31.     double alpha = 1.0;
  32.     double beta = 0.0;
  33.  
  34.     srand(19702013);
  35.    
  36.     if (argc > 1) {
  37.         DIM = atoi(argv[1]);
  38.     }
  39.    
  40.     if (argc > 2) {
  41.         mkl_set_num_threads(atoi(argv[2]));
  42.     }
  43.    
  44.     int size = DIM*DIM;
  45.    
  46.     // allocate
  47.     A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  48.     B = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  49.     C = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  50.    
  51.     // fill
  52.     for (int i=0; i<size; ++i) {
  53.         A[i] = (rand() % 131) * 0.01;
  54.         B[i] = (rand() % 131) * 0.01;
  55.     }
  56.    
  57.     // ------------------------------------------
  58.     // perform product with MKL
  59.     // ------------------------------------------
  60.     cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, DIM, DIM, DIM,
  61.         alpha, A, DIM, B, DIM, beta, C, DIM);
  62.  
  63.  
  64.     // check result
  65.     double sum = accumulate(&C[0], &C[size], 0.0);
  66.    
  67.     cout << "sum = " << sum << endl;
  68.    
  69.     _mm_free(A);
  70.     _mm_free(B);
  71.     _mm_free(C);
  72.     return 0;
  73. }
  74.  

3.3.4.b  Exemple de valeurs propres

On utilise DGEEV

  1. #include <iostream>
  2. #include <numeric>
  3. using namespace std;
  4. #include <mkl.h>
  5. #include <mkl_lapack.h>
  6. #include <xmmintrin.h>
  7.  
  8. int DIM = 10;
  9. double *A;
  10. double *work;
  11.  
  12. int main(int argc, char *argv[]) {
  13.  
  14.     srand(19702013);
  15.    
  16.     if (argc > 1) {
  17.         DIM = atoi(argv[1]);
  18.     }
  19.    
  20.     if (argc > 2) {
  21.         mkl_set_num_threads(atoi(argv[2]));
  22.     }
  23.      
  24.     MKL_INT n, lda, lwork, ldvl, ldvr, info, idxi, idxj;
  25.     double *wr, *wi, *A, *work, *vl, *vr;
  26.     char jobvl, jobvr;
  27.    
  28.     n = DIM;
  29.     lda = DIM;
  30.     ldvl = 1;
  31.     ldvr = DIM;
  32.     lwork = 4*DIM;
  33.     jobvl = 'N'; // or 'V'
  34.     jobvr = 'N'; // or 'V'
  35.    
  36.     int size = DIM*DIM;
  37.    
  38.     A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  39.     wr = static_cast<double *>(_mm_malloc(DIM * sizeof(double), 16));
  40.     wi = static_cast<double *>(_mm_malloc(DIM * sizeof(double), 16));
  41.     vl = static_cast<double *>(_mm_malloc(DIM * ldvl * sizeof(double), 16));
  42.     vr = static_cast<double *>(_mm_malloc(DIM * ldvr * sizeof(double), 16));
  43.     work = static_cast<double *>(_mm_malloc(lwork * sizeof(double), 16));
  44.  
  45.     for (int i=0; i<DIM; ++i) {
  46.         for (int j=0; j<DIM; ++j) {
  47.             A[i*DIM +j] = 1.0/(i+j+2.0);
  48.         }
  49.     }
  50.     /*
  51.     cerr << "A = [" << endl;
  52.     for (int i=0; i<size; ++i) cerr << A[i] << " ";
  53.     cerr << endl << "]" << endl;
  54.     */
  55.    
  56.     //void dgeev( const char* jobvl, const char* jobvr, const MKL_INT* n, double* a,
  57.     //        const MKL_INT* lda, double* wr, double* wi, double* vl,
  58.     //        const MKL_INT* ldvl, double* vr, const MKL_INT* ldvr, double* work,
  59.     //        const MKL_INT* lwork, MKL_INT* info );
  60.     dgeev(&jobvl, &jobvr, &n, A, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, &info);
  61.  
  62.        
  63.     cerr << "A = [" << endl;
  64.     for (int i=0; i<10; ++i) cerr << A[i] << " ";
  65.     cerr << endl << "]" << endl;
  66.    
  67.     cerr << "wr = [" << endl;
  68.     for (int i=0; i<10; ++i) cerr << wr[i] << " " << endl;
  69.     cerr << endl << "]" << endl;
  70.    
  71.     cerr << "wi = [" << endl;
  72.     for (int i=0; i<10; ++i) cerr << wi[i] << " " << endl;
  73.     cerr << endl << "]" << endl;
  74.    
  75.    
  76.     return 0;
  77. }
  78.  
  79.  

Compiler avec :

icpc  -o mkl_eigen.exe mkl_eigen.cpp -O3 -msse2 -DMKL_ILP64 -openmp -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm

3.3.5. GNU GSL

The GNU Scientific Library (GSL) is a numerical library for C and C++ programmers. It is free software under the GNU General Public License.

The library provides a wide range of mathematical routines such as random number generators, special functions and least-squares fitting. There are over 1000 functions in total with an extensive test suite.

Sous Ubuntu, installer les packages : libgsl0ldbl et libgsl0-dev

3.3.5.a  Produit de matrices

  1. #include <iostream>
  2. #include <numeric>
  3. using namespace std;
  4. #include <gsl/gsl_blas.h>
  5. #include <xmmintrin.h>
  6.  
  7. /*
  8. extern "C" int  gsl_blas_dgemm (CBLAS_TRANSPOSE_t TransA,
  9.                      CBLAS_TRANSPOSE_t TransB,
  10.                      double alpha,
  11.                      const gsl_matrix * A,
  12.                      const gsl_matrix * B,
  13.                      double beta,
  14.                      gsl_matrix * C);
  15.  */
  16. int DIM = 1000;
  17. double *A;
  18. double *B;
  19. double *C;
  20.                  
  21. int main(int argc, char *argv[]) {
  22.     char TRANS = 'N';
  23.     double alpha = 1.0;
  24.     double beta = 0.0;
  25.  
  26.     srand(19702013);
  27.    
  28.     if (argc > 1) {
  29.         DIM = atoi(argv[1]);
  30.     }
  31.    
  32.     int size = DIM*DIM;
  33.    
  34.     // allocate
  35.     A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  36.     B = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  37.     C = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
  38.    
  39.     // fill
  40.     for (int i=0; i<size; ++i) {
  41.         A[i] = (rand() % 131) * 0.01;
  42.         B[i] = (rand() % 131) * 0.01;
  43.     }
  44.  
  45.     gsl_matrix_view a = gsl_matrix_view_array(A, DIM, DIM);
  46.     gsl_matrix_view b = gsl_matrix_view_array(B, DIM, DIM);
  47.     gsl_matrix_view c = gsl_matrix_view_array(C, DIM, DIM);
  48.    
  49.     gsl_blas_dgemm(CblasNoTrans, CblasNoTrans,
  50.         alpha, &a.matrix, &b.matrix, beta, &c.matrix);
  51.  
  52.     // check result
  53.     double sum = accumulate(&C[0], &C[size], 0.0);
  54.    
  55.     cout << "sum = " << sum << endl;
  56.    
  57.     _mm_free(A);
  58.     _mm_free(B);
  59.     _mm_free(C);
  60.     return 0;
  61. }
  62.  

3.3.5.b  Exemple de régression linéaire multiple

  1. // g++ -o gsl_regr_lin_mult.exe gsl_regr_lin_mult.cpp -Wall -lgsl -lgslcblas
  2. // export LD_LIBRARY_PATH=/usr/local/lib
  3. #include <stdio.h>
  4. #include <gsl/gsl_matrix.h>
  5. #include <gsl/gsl_math.h>
  6. #include <gsl/gsl_multifit.h>
  7.  
  8. #include <iostream>
  9. #include <iomanip>
  10. using namespace std;
  11.  
  12. // structure used to store Data, we want to predict
  13. // for each person (name), the weight in function of
  14. // gender, age, height:
  15. // weight = c0 + c1 * gender + c2 * age + c3 * height
  16. // we have 5 persons so N=5
  17. // we have 4 coefficients to find: c0, c1, c2, c3, P=4
  18. // we should obtain: -337.945  54.6301  2.96528  5.37169
  19. typedef struct {
  20.     string name;
  21.     int gender; // 1 male, 0 female
  22.     int age;    // years
  23.     int height; // inches
  24.     int weight; // pounds
  25. } Data;
  26.  
  27. Data data[] = {
  28.     {"Joe ", 1,  25,     72,     178},
  29.     {"Jill", 0,  32,     68,     122},
  30.     {"Jack", 1,  27,     69,     167},
  31.     {"John", 1,  45,     67,     210},
  32.     {"Jane", 0,  38,     62,     108}
  33. };
  34.  
  35. int main() {
  36.     int N = sizeof(data) / sizeof(Data);
  37.     int P = 4; // cste, gender, age, weight
  38.    
  39.     cout << "N = " << N << endl;
  40.     cout << "P = " << P << endl;
  41.    
  42.  
  43.     gsl_vector *y; // observed data (height)
  44.     gsl_matrix *X; // data used to predict : cste, gender, age, weight
  45.     gsl_vector *c; // the coefficients c0, c1, c2, c3
  46.     gsl_matrix *cov;
  47.  
  48.     // allocate space for the matrices and vectors
  49.     X = gsl_matrix_alloc(N, P); // this is an input
  50.     y = gsl_vector_alloc(N); //this is an input
  51.  
  52.     c = gsl_vector_alloc(P); //this is an output
  53.     cov = gsl_matrix_alloc(P, P); //this is an output
  54.  
  55.     //now put the data into the X matrix, row by row
  56.     for (int i=0; i<N; ++i) {
  57.         gsl_matrix_set(X, i, 0, static_cast<double>(1)); // because cste
  58.         gsl_matrix_set(X, i, 1, static_cast<double>(data[i].gender));
  59.         gsl_matrix_set(X, i, 2, static_cast<double>(data[i].age));
  60.         gsl_matrix_set(X, i, 3, static_cast<double>(data[i].height));
  61.     }
  62.    
  63.     // fill vector of observed data
  64.     for (int i=0; i<N; ++i) {
  65.         gsl_vector_set(y, i, data[i].weight);
  66.     }
  67.    
  68.     double chisq;
  69.  
  70.     // allocate temporary work space for gsl
  71.     gsl_multifit_linear_workspace *work;
  72.     work = gsl_multifit_linear_alloc(N, P);
  73.  
  74.     // now do the fit
  75.     gsl_multifit_linear(X, y, c, cov, &chisq, work);
  76.  
  77.     cout << "coefficients:" << endl;
  78.     for (int j = 0; j<P; j++) {
  79.         cout << "c" << j << " = " << std::setprecision(9);
  80.         cout << gsl_vector_get(c, j) << endl;
  81.     }
  82.    
  83.     cout << endl;
  84.     cout << "person, observed weight <=> predicted weight" << endl;
  85.     double err = 0.0, errsq = 0.0;
  86.     for (int i=0; i<N; ++i) {
  87.         double r = gsl_vector_get(c, 0);
  88.         r += data[i].gender * gsl_vector_get(c, 1);
  89.         r += data[i].age * gsl_vector_get(c, 2);
  90.         r += data[i].height * gsl_vector_get(c, 3);
  91.         cout << data[i].name << ", " << data[i].weight;
  92.         cout << " <=> " << std::setprecision(9) << r << endl;
  93.         err += fabs(data[i].weight - r);
  94.         errsq += fabs(data[i].weight - r) * fabs(data[i].weight - r);
  95.     }
  96.     cout << "error = " << err << endl;
  97.     cout << "errorsq = " << errsq << endl;
  98.     cout << endl;
  99.     cout << "chi square = " << chisq << endl;
  100.     cout << "covariance matrix:" << endl;
  101.     for (int i=0; i<P; ++i) {
  102.         for (int j = 0; j<P; j++) {
  103.             cout.setf( std::ios::fixed, std:: ios::floatfield );
  104.             cout << gsl_matrix_get(cov, i, j) << " ";
  105.         }
  106.         cout << endl;
  107.     }
  108.    
  109.     //************************************
  110.     //very important stuff at the end here
  111.     //************************************
  112.     // don't forget to deallocate - there is no garbage collector in C!
  113.  
  114.     gsl_matrix_free(X);
  115.     gsl_matrix_free(cov);
  116.     gsl_vector_free(y);
  117.     gsl_vector_free(c);
  118.  
  119.     return 0;
  120. }
  121.  
N = 5
P = 4
coefficients:
c0 = -337.944703
c1 = 54.6301002
c2 = 2.96528275
c3 = 5.37168933

person, observed weight <=> predicted weight
Joe , 178 <=> 177.579098
Jill, 122 <=> 122.21922
Jack, 167 <=> 167.394596
John, 210 <=> 210.026306
Jane, 108 <=> 107.78078
error = 1.28024338
errorsq = 0.429670723

chi square = 0.429670723
covariance matrix:
165.856820545 7.759985511 -0.613595628 -2.217940588 
7.759985511 0.773607220 -0.023221288 -0.110185781 
-0.613595628 -0.023221288 0.003537017 0.007535385 
-2.217940588 -0.110185781 0.007535385 0.030064648 

3.3.5.c  Exemple de calcul avec valeurs propres

  1. #include <iostream>
  2. #include <numeric>
  3. using namespace std;
  4. #include <gsl/gsl_blas.h>
  5. #include <gsl/gsl_matrix.h>
  6. #include <gsl/gsl_eigen.h>
  7. #include <xmmintrin.h>
  8. #include <omp.h>
  9.  
  10. int DIM = 10;
  11. int num_threds = 1;
  12.  
  13. gsl_matrix *A;
  14. gsl_matrix *B;
  15. gsl_vector *v;
  16. gsl_eigen_symmv_workspace *wrk;
  17.                  
  18. int main(int argc, char *argv[]) {
  19.     char TRANS = 'N';
  20.     double alpha = 1.0;
  21.     double beta = 0.0;
  22.  
  23.     srand(19702013);
  24.      
  25.     if (argc > 1) {
  26.         DIM = atoi(argv[1]);
  27.     }
  28.    
  29.     if (argc > 2) {
  30.         omp_set_num_threads(atoi(argv[2]));
  31.     }
  32.    
  33.     int size = DIM*DIM;
  34.    
  35.     A = gsl_matrix_alloc(DIM, DIM);
  36.     for (int i=0; i<DIM; ++i) {
  37.         for (int j=0; j<DIM; ++j) {
  38.             gsl_matrix_set(A, i, j, 1.0/(i+j+2.0));
  39.         }
  40.     }
  41.    
  42.     /*
  43.     cerr << "A = [" << endl;
  44.     for (int i=0; i<DIM; ++i) {
  45.         for (int j=0; j<DIM; j++) {
  46.             cerr << gsl_matrix_get(A, j, i) << " ";
  47.         }
  48.         cerr << endl;
  49.     }
  50.     cerr << "]" << endl;
  51.     */
  52.    
  53.     v = gsl_vector_alloc(DIM);
  54.     B = gsl_matrix_alloc(DIM, DIM);
  55.    
  56.     wrk = gsl_eigen_symmv_alloc(DIM);
  57.    
  58.     gsl_eigen_symmv(A, v, B, wrk);
  59.    
  60.    
  61.     cerr << endl << "v = [";
  62.     for (int i=0; i<DIM; ++i) {
  63.         cerr << gsl_vector_get(v, i) << " ";
  64.     }
  65.     cerr << "]";
  66.    
  67.    
  68.     gsl_eigen_symmv_free(wrk);
  69.     gsl_vector_free(v);
  70.     gsl_matrix_free(A);
  71.     gsl_matrix_free(B);
  72.    
  73.     return 0;
  74. }
  75.  

Compiler avec :

g++ -o gsl_eigen.exe gsl_eigen.cpp -O2 -lgsl -lgslcblas -lm

Résultat :

./gsl_eigen.exe 3
0: 0.875115 0.743136 0.527843 0.411255 
1: 0.0409049 0.638787 -0.376612 -0.670906 
2: 0.000646659 0.19925 -0.761278 0.617053

3.3.6. PETSc

PETSc is a suite of data structures and routines for the scalable (parallel) solution of scientific applications modeled by partial differential equations. It supports MPI, shared memory pthreads, and GPUs through CUDA or OpenCL, as well as hybrid MPI-shared memory pthreads or MPI-GPU parallelism

3.3.6.a  Exemple produit de matrices

  1. #include <petscmat.h>
  2. #include <petscsys.h>
  3. #include <xmmintrin.h>
  4. #include <iostream>
  5. using namespace std;
  6.  
  7.  
  8. PetscInt DIM = 1000;
  9.        
  10. static char help[] = "Empty program.\n\n";
  11.                  
  12. int main(int argc, char *argv[]) {
  13.     PetscMPIInt    rank;
  14.     PetscErrorCode ierr;
  15.     Mat A, B, C;
  16.     PetscScalar      fill=4.0;
  17.  
  18.     srand(19702013);
  19.    
  20.     if (argc > 1) {
  21.         DIM = atoi(argv[1]);
  22.     }
  23.    
  24.     ierr = PetscInitialize(&argc,&argv,(char *)0,help); CHKERRQ(ierr);
  25.  
  26.     // get rank from MPI ordering:
  27.     MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
  28.     PetscOptionsGetInt(NULL,"-size",&DIM,NULL);
  29.    
  30.     int size = DIM*DIM;
  31.    
  32.     PetscScalar *a, *b;
  33.    
  34.     PetscMalloc1(size*sizeof(PetscScalar), &a);
  35.     PetscMalloc1(size*sizeof(PetscScalar), &b);
  36.    
  37.     // fill
  38.     for (int i=0; i<size; ++i) {
  39.         a[i] = (rand() % 131) * 0.01;
  40.         b[i] = (rand() % 131) * 0.01;
  41.     }
  42.    
  43.     MatCreate(MPI_COMM_SELF, &A);
  44.     MatSetSizes(A,DIM, DIM, DIM, DIM);
  45.     MatSetType(A,MATSEQDENSE);
  46.     MatSeqDenseSetPreallocation(A,a);
  47.     MatSeqDenseSetLDA(A,DIM);
  48.     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
  49.     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
  50.  
  51.     MatCreate(MPI_COMM_SELF, &B);
  52.     MatSetSizes(B,DIM, DIM, DIM, DIM);
  53.     MatSetType(B,MATSEQDENSE);
  54.     MatSeqDenseSetPreallocation(B,b);
  55.     MatSeqDenseSetLDA(B,DIM);
  56.     MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
  57.     MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
  58.    
  59.     ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,fill,&C);CHKERRQ(ierr);
  60.    
  61.     /*if (rank == 0) {
  62.         cout << "rank = " << rank << endl;
  63.         MatView(A, PETSC_VIEWER_STDOUT_SELF);
  64.         MatView(B, PETSC_VIEWER_STDOUT_SELF);
  65.         MatView(C, PETSC_VIEWER_STDOUT_SELF);
  66.     }*/
  67.    
  68.     PetscFree(a);
  69.     PetscFree(b);
  70.     MatDestroy(&A);
  71.     MatDestroy(&B);
  72.     MatDestroy(&C);
  73.      
  74.     ierr = PetscFinalize();CHKERRQ(ierr);
  75.      
  76.     return 0;
  77. }
  78.  

3.3.7. Comparaison BLAS, GSL, MKL, PETSC

3.3.7.a  Produit de matrices

 method / dimension   1000   2000   4000   8000 
 blas   0.63   5.24   43.14   349.87 
 gsl   0.73    7.96    67.09   518.74 
 mkl (1 thread)   0.07   1.40   3.28   22.28 
 mkl (2 threads)   0.04   1.05   2.18   12.35 
 mkl (4 threads)   0.04   0.86    1.66   6.98 
 petsc (4 threads)   3.24   26.49   3m36   30m(*) 
Produit de matrices carrées : temps de calcul (en secondes) pour différentes librairies sur Intel i5-4570 CPU at 3.20GHz 64 bits (double)

(*) Note : valeur estimée (facteur 8.15)

3.3.7.b  Valeurs propres

 method / dimension   1000   2000   4000   8000 
 gsl (1 thread)   3.92   41.54   89.82   > 1h 
 mkl (1 thread)   1.25   11.70   88.65   665.09 
 mkl (2 threads)   0.92   9.70   74.93   658.76 
 mkl (4 threads)   0.82   9.35   72.12   582.31 
 mkl (4 threads) (1)   0.58   6.68   51.04   397.83 
 python (1 thread)   1.62   10.21   75.20   457.58 
 python (2 threads) + openblas (2)   1.98   9.57   55.71   375.14 
 python (4 threads) + openblas (3)   1.93   9.98   57.81     385.33 
Valeurs propres : temps de calcul pour différentes librairies sur Intel i5-4570 CPU at 3.20GHz 64 bits (double)