3. C++ et le monde extérieur

3.3. Librairies dédiées au calcul scientifique

3.3.1. Complex<T> de la STL

Pour les calculs avec des nombres complexes on peut utiliser la STL qui dispose d'un type Complex<T> à instancier.

CODE
complex.cpp

#include <iostream>
#include <complex>
#include <cstdlib>
#include <valarray>
using namespace std;
 
typedef complex<double> Complex;
 
int main() {
    Complex c2(2, 0);
    Complex ci(0, 1);
 
    cout << ci << " + " << c2 << " = " << ci+c2 << '\n'
         << ci << " * " << ci << " = " << ci*ci << '\n'
         << ci << " + " << c2 << " / " << ci << " = " << ci+c2/ci << '\n'
         << 1  << " / " << ci << " = " << 1./ci << '\n';
 
    cout << "norm of c2 = " << norm(c2) << endl;
    cout << "norm of ci = " << norm(ci) << endl;
     
    cout << "real(c2) = " << real(c2) << endl;
    cout << "imag(ci) = " << imag(ci) << endl;
     
     
    valarray<complex<double> > v(10);
    for (size_t i = 0; i <v.size(); ++i) {
        v[i].real( static_cast<double>(rand() % 1000) / 1000);
        v[i].imag( static_cast<double>(rand() % 1000) / 1000);
    }
    
    for (auto x : v) {  
        cout << x << endl;
    }
    complex<double> sc = v.sum();
    cout << "===========" << endl;
    cout << "sum = " << sc << endl;
     
    exit(EXIT_SUCCESS); 
}
 

(0,1) + (2,0) = (2,1)
(0,1) * (0,1) = (-1,0)
(0,1) + (2,0) / (0,1) = (0,-1)
1 / (0,1) = (0,-1)
norm of c2 = 4
norm of ci = 1
real(c2) = 2
imag(ci) = 1
(0.383,0.886)
(0.777,0.915)
(0.793,0.335)
(0.386,0.492)
(0.649,0.421)
(0.362,0.027)
(0.69,0.059)
(0.763,0.926)
(0.54,0.426)
(0.172,0.736)
===========
sum = (5.515,5.223)

3.3.2. BLAS

BLAS (Basic Linear Algebra Subprograms) est une librairie de sous-programmes optimisés pour calcul matriciel :

BLAS niveau 1 : scalar, vector et vector-vector
BLAS niveau 2 : matrix-vector
BLAS niveau 3 : matrix-matrix

BLAS Reference Card

Sous Ubuntu, installer les packages : libblas3 libblas-dev

Exemple produit de matrices avec double DGEMM (Double GEneral Matrix Matrix) :

$$ C ← α op(A) × op(B) + β C $$

où $α$ et $β$ sont des réels et $op$ est soit l'identité, la transposée ou la transposée conjuguée.

CODE
blas_matrix_product.cpp

#include <iostream>
#include <numeric>
using namespace std;
#include <cblas.h>
#include <xmmintrin.h>
 
/*
extern "C" void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const double alpha, const double *A,
                 const int lda, const double *B, const int ldb,
                 const double beta, double *C, const int ldc);
 */
int DIM = 1000;
double *A;
double *B;
double *C;
                 
int main(int argc, char *argv[]) {
    char TRANS = 'N';
    double alpha = 1.0;
    double beta = 0.0;
 
    srand(19702013);
    
    if (argc > 1) {
        DIM = atoi(argv[1]);
    }
    
    int size = DIM*DIM;
    
    // allocate
    A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    B = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    C = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    
    // fill
    for (int i=0; i<size; ++i) {
        A[i] = (rand() % 131) * 0.01;
        B[i] = (rand() % 131) * 0.01;
    }
    
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, DIM, DIM, DIM, 
        alpha, A, DIM, B, DIM, beta, C, DIM);
 
    // check result
    double sum = accumulate(&C[0], &C[size], 0.0);
    
    cout << "sum = " << sum << endl;
    
    _mm_free(A);
    _mm_free(B);
    _mm_free(C);
    return 0;
}
 

3.3.3. LAPACK

LAPACK (or Linear Algebra PACKage) provides routines :

for solving systems of simultaneous linear equations
least-squares solutions of linear systems of equations
eigenvalue problems, and singular value problems

Sous Ubuntu installer les packages liblapack3 (dynamique) ou liblapack-dev (statique)

3.3.4. Intel MKL (Math Kernel Library)

Intel Math Kernel Library (Intel® MKL) includes a wealth of math processing routines to accelerate application performance and reduce development time. Intel® MKL includes highly vectorized and threaded Linear Algebra, Fast Fourier Transforms (FFT), Vector Math and Statistics functions. The easiest way to take advantage of all of that processing power is to use a carefully optimized computing math library. Even the best compiler can’t compete with the level of performance possible from a hand-optimized library. If your application already relies on the BLAS or LAPACK functionality, simply re-link with Intel® MKL to get better performance on Intel and compatible architectures.

Voir MKL Cookbook et MKL Link Advisor

3.3.4.a Exemple produit de matrix avec double

On utilise encore DGEMM de MKL :

CODE
mkl_matrix_product.cpp

/*
source /opt/intel/bin/iccvars.sh intel64
icpc -o mkl_matrix_product.exe mkl_matrix_product.cpp -O3 -msse2 -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_sequential.a -Wl,--end-group -lpthread -lm
 
icpc -o mkl_matrix_product.exe mkl_matrix_product.cpp -O3 -msse2 -DMKL_ILP64 -openmp -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm
 
*/
#include <iostream>
#include <numeric>
using namespace std;
#include <mkl.h>
#include <xmmintrin.h>
 
/*
extern "C" void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
                 const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
                 const int K, const double alpha, const double *A,
                 const int lda, const double *B, const int ldb,
                 const double beta, double *C, const int ldc);
 */
int DIM = 1000;
double *A;
double *B;
double *C;
 
// ----------------------------------------------
// main function
// ----------------------------------------------
int main(int argc, char *argv[]) {
    char TRANS = 'N';
    double alpha = 1.0;
    double beta = 0.0;
 
    srand(19702013);
    
    if (argc > 1) {
        DIM = atoi(argv[1]);
    }
    
    if (argc > 2) {
        mkl_set_num_threads(atoi(argv[2]));
    }
    
    int size = DIM*DIM;
    
    // allocate
    A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    B = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    C = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    
    // fill
    for (int i=0; i<size; ++i) {
        A[i] = (rand() % 131) * 0.01;
        B[i] = (rand() % 131) * 0.01;
    }
    
    // ------------------------------------------
    // perform product with MKL
    // ------------------------------------------
    cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, DIM, DIM, DIM, 
        alpha, A, DIM, B, DIM, beta, C, DIM);
 
 
    // check result
    double sum = accumulate(&C[0], &C[size], 0.0);
    
    cout << "sum = " << sum << endl;
    
    _mm_free(A);
    _mm_free(B);
    _mm_free(C);
    return 0;
}
 

3.3.4.b Exemple de valeurs propres

On utilise DGEEV

CODE
mkl_eigen.cpp

#include <iostream>
#include <numeric>
using namespace std;
#include <mkl.h>
#include <mkl_lapack.h>
#include <xmmintrin.h> 
 
int DIM = 10;
double *A; 
double *work;
 
int main(int argc, char *argv[]) {
 
    srand(19702013);
    
    if (argc > 1) {
        DIM = atoi(argv[1]);
    }
    
    if (argc > 2) {
        mkl_set_num_threads(atoi(argv[2]));
    }
     
    MKL_INT n, lda, lwork, ldvl, ldvr, info, idxi, idxj;
    double *wr, *wi, *A, *work, *vl, *vr;
    char jobvl, jobvr;
    
    n = DIM;
    lda = DIM;
    ldvl = 1;
    ldvr = DIM;
    lwork = 4*DIM;
    jobvl = 'N'; // or 'V'
    jobvr = 'N'; // or 'V'
    
    int size = DIM*DIM;
    
    A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    wr = static_cast<double *>(_mm_malloc(DIM * sizeof(double), 16));
    wi = static_cast<double *>(_mm_malloc(DIM * sizeof(double), 16));
    vl = static_cast<double *>(_mm_malloc(DIM * ldvl * sizeof(double), 16));
    vr = static_cast<double *>(_mm_malloc(DIM * ldvr * sizeof(double), 16));
    work = static_cast<double *>(_mm_malloc(lwork * sizeof(double), 16));
 
    for (int i=0; i<DIM; ++i) {
        for (int j=0; j<DIM; ++j) {
            A[i*DIM +j] = 1.0/(i+j+2.0);
        }
    }
    /*
    cerr << "A = [" << endl;
    for (int i=0; i<size; ++i) cerr << A[i] << " ";
    cerr << endl << "]" << endl;
    */
    
    //void dgeev( const char* jobvl, const char* jobvr, const MKL_INT* n, double* a,
    //        const MKL_INT* lda, double* wr, double* wi, double* vl,
    //        const MKL_INT* ldvl, double* vr, const MKL_INT* ldvr, double* work,
    //        const MKL_INT* lwork, MKL_INT* info );
    dgeev(&jobvl, &jobvr, &n, A, &lda, wr, wi, vl, &ldvl, vr, &ldvr, work, &lwork, &info);
 
        
    cerr << "A = [" << endl;
    for (int i=0; i<10; ++i) cerr << A[i] << " ";
    cerr << endl << "]" << endl;
    
    cerr << "wr = [" << endl;
    for (int i=0; i<10; ++i) cerr << wr[i] << " " << endl;
    cerr << endl << "]" << endl;
    
    cerr << "wi = [" << endl;
    for (int i=0; i<10; ++i) cerr << wi[i] << " " << endl;
    cerr << endl << "]" << endl;
    
    
    return 0;
}
 
 

Compiler avec :

icpc  -o mkl_eigen.exe mkl_eigen.cpp -O3 -msse2 -DMKL_ILP64 -openmp -Wl,--start-group /opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a /opt/intel/mkl/lib/intel64/libmkl_core.a /opt/intel/mkl/lib/intel64/libmkl_intel_thread.a -Wl,--end-group -lpthread -lm

3.3.5. GNU GSL

The GNU Scientific Library (GSL) is a numerical library for C and C++ programmers. It is free software under the GNU General Public License.

The library provides a wide range of mathematical routines such as random number generators, special functions and least-squares fitting. There are over 1000 functions in total with an extensive test suite.

Sous Ubuntu, installer les packages : libgsl0ldbl et libgsl0-dev

3.3.5.a Produit de matrices

CODE
gsl_blas_matrix_product.cpp

#include <iostream>
#include <numeric>
using namespace std;
#include <gsl/gsl_blas.h>
#include <xmmintrin.h>
 
/*
extern "C" int  gsl_blas_dgemm (CBLAS_TRANSPOSE_t TransA,
                     CBLAS_TRANSPOSE_t TransB,
                     double alpha,
                     const gsl_matrix * A,
                     const gsl_matrix * B,
                     double beta,
                     gsl_matrix * C);
 */
int DIM = 1000;
double *A;
double *B;
double *C;
                 
int main(int argc, char *argv[]) {
    char TRANS = 'N';
    double alpha = 1.0;
    double beta = 0.0;
 
    srand(19702013);
    
    if (argc > 1) {
        DIM = atoi(argv[1]);
    }
    
    int size = DIM*DIM;
    
    // allocate
    A = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    B = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    C = static_cast<double *>(_mm_malloc(size * sizeof(double), 16));
    
    // fill
    for (int i=0; i<size; ++i) {
        A[i] = (rand() % 131) * 0.01;
        B[i] = (rand() % 131) * 0.01;
    }
 
    gsl_matrix_view a = gsl_matrix_view_array(A, DIM, DIM);
    gsl_matrix_view b = gsl_matrix_view_array(B, DIM, DIM);
    gsl_matrix_view c = gsl_matrix_view_array(C, DIM, DIM);
    
    gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 
        alpha, &a.matrix, &b.matrix, beta, &c.matrix);
 
    // check result
    double sum = accumulate(&C[0], &C[size], 0.0);
    
    cout << "sum = " << sum << endl;
    
    _mm_free(A);
    _mm_free(B);
    _mm_free(C);
    return 0;
}
 

3.3.5.b Exemple de régression linéaire multiple

CODE
gsl_regr_lin_mult.cpp

// g++ -o gsl_regr_lin_mult.exe gsl_regr_lin_mult.cpp -Wall -lgsl -lgslcblas
// export LD_LIBRARY_PATH=/usr/local/lib
#include <stdio.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_multifit.h>
 
#include <iostream>
#include <iomanip>
using namespace std;
 
// structure used to store Data, we want to predict
// for each person (name), the weight in function of
// gender, age, height:
// weight = c0 + c1 * gender + c2 * age + c3 * height
// we have 5 persons so N=5
// we have 4 coefficients to find: c0, c1, c2, c3, P=4
// we should obtain: -337.945  54.6301  2.96528  5.37169
typedef struct {
    string name;
    int gender; // 1 male, 0 female
    int age;    // years
    int height; // inches
    int weight; // pounds
} Data;
 
Data data[] = {
    {"Joe ", 1,  25,     72,     178},
    {"Jill", 0,  32,     68,     122},
    {"Jack", 1,  27,     69,     167},
    {"John", 1,  45,     67,     210},
    {"Jane", 0,  38,     62,     108}
};
 
int main() {
    int N = sizeof(data) / sizeof(Data);
    int P = 4; // cste, gender, age, weight
    
    cout << "N = " << N << endl;
    cout << "P = " << P << endl;
    
 
    gsl_vector *y; // observed data (height)
    gsl_matrix *X; // data used to predict : cste, gender, age, weight
    gsl_vector *c; // the coefficients c0, c1, c2, c3
    gsl_matrix *cov;
 
    // allocate space for the matrices and vectors
    X = gsl_matrix_alloc(N, P); // this is an input
    y = gsl_vector_alloc(N); //this is an input
 
    c = gsl_vector_alloc(P); //this is an output
    cov = gsl_matrix_alloc(P, P); //this is an output
 
    //now put the data into the X matrix, row by row
    for (int i=0; i<N; ++i) {
        gsl_matrix_set(X, i, 0, static_cast<double>(1)); // because cste
        gsl_matrix_set(X, i, 1, static_cast<double>(data[i].gender));
        gsl_matrix_set(X, i, 2, static_cast<double>(data[i].age));
        gsl_matrix_set(X, i, 3, static_cast<double>(data[i].height)); 
    }
    
    // fill vector of observed data
    for (int i=0; i<N; ++i) {
        gsl_vector_set(y, i, data[i].weight);
    }
    
    double chisq;
 
    // allocate temporary work space for gsl 
    gsl_multifit_linear_workspace *work;
    work = gsl_multifit_linear_alloc(N, P);
 
    // now do the fit
    gsl_multifit_linear(X, y, c, cov, &chisq, work);
 
    cout << "coefficients:" << endl;
    for (int j = 0; j<P; j++) {
        cout << "c" << j << " = " << std::setprecision(9);
        cout << gsl_vector_get(c, j) << endl;
    }
    
    cout << endl;
    cout << "person, observed weight <=> predicted weight" << endl;
    double err = 0.0, errsq = 0.0;
    for (int i=0; i<N; ++i) {
        double r = gsl_vector_get(c, 0);
        r += data[i].gender * gsl_vector_get(c, 1); 
        r += data[i].age * gsl_vector_get(c, 2);
        r += data[i].height * gsl_vector_get(c, 3);
        cout << data[i].name << ", " << data[i].weight;
        cout << " <=> " << std::setprecision(9) << r << endl;
        err += fabs(data[i].weight - r);
        errsq += fabs(data[i].weight - r) * fabs(data[i].weight - r);
    }
    cout << "error = " << err << endl;
    cout << "errorsq = " << errsq << endl;
    cout << endl;
    cout << "chi square = " << chisq << endl;
    cout << "covariance matrix:" << endl;
    for (int i=0; i<P; ++i) {
        for (int j = 0; j<P; j++) {
            cout.setf( std::ios::fixed, std:: ios::floatfield );
            cout << gsl_matrix_get(cov, i, j) << " "; 
        }
        cout << endl;
    }
    
    //************************************
    //very important stuff at the end here
    //************************************
    // don't forget to deallocate - there is no garbage collector in C!
 
    gsl_matrix_free(X);
    gsl_matrix_free(cov);
    gsl_vector_free(y);
    gsl_vector_free(c);
 
    return 0;
}
 

N = 5
P = 4
coefficients:
c0 = -337.944703
c1 = 54.6301002
c2 = 2.96528275
c3 = 5.37168933

person, observed weight <=> predicted weight
Joe , 178 <=> 177.579098
Jill, 122 <=> 122.21922
Jack, 167 <=> 167.394596
John, 210 <=> 210.026306
Jane, 108 <=> 107.78078
error = 1.28024338
errorsq = 0.429670723

chi square = 0.429670723
covariance matrix:
165.856820545 7.759985511 -0.613595628 -2.217940588 
7.759985511 0.773607220 -0.023221288 -0.110185781 
-0.613595628 -0.023221288 0.003537017 0.007535385 
-2.217940588 -0.110185781 0.007535385 0.030064648

3.3.5.c Exemple de calcul avec valeurs propres

CODE
gsl_eigen.cpp

#include <iostream>
#include <numeric>
using namespace std;
#include <gsl/gsl_blas.h>
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_eigen.h>
#include <xmmintrin.h>
#include <omp.h>
 
int DIM = 10; 
int num_threds = 1; 
 
gsl_matrix *A;
gsl_matrix *B;
gsl_vector *v;
gsl_eigen_symmv_workspace *wrk;
                 
int main(int argc, char *argv[]) {
    char TRANS = 'N';
    double alpha = 1.0;
    double beta = 0.0;
 
    srand(19702013);
     
    if (argc > 1) {
        DIM = atoi(argv[1]);
    }
    
    if (argc > 2) {
        omp_set_num_threads(atoi(argv[2]));
    }
    
    int size = DIM*DIM;
    
    A = gsl_matrix_alloc(DIM, DIM);
    for (int i=0; i<DIM; ++i) {
        for (int j=0; j<DIM; ++j) {
            gsl_matrix_set(A, i, j, 1.0/(i+j+2.0));
        }
    }
    
    /*
    cerr << "A = [" << endl;
    for (int i=0; i<DIM; ++i) {
        for (int j=0; j<DIM; j++) {
            cerr << gsl_matrix_get(A, j, i) << " ";
        }
        cerr << endl;
    }
    cerr << "]" << endl;
    */
    
    v = gsl_vector_alloc(DIM);
    B = gsl_matrix_alloc(DIM, DIM);
    
    wrk = gsl_eigen_symmv_alloc(DIM);
    
    gsl_eigen_symmv(A, v, B, wrk);
    
    
    cerr << endl << "v = [";
    for (int i=0; i<DIM; ++i) {
        cerr << gsl_vector_get(v, i) << " ";
    }
    cerr << "]";
    
    
    gsl_eigen_symmv_free(wrk);
    gsl_vector_free(v);
    gsl_matrix_free(A);
    gsl_matrix_free(B);
    
    return 0;
}
 

Compiler avec :

g++ -o gsl_eigen.exe gsl_eigen.cpp -O2 -lgsl -lgslcblas -lm

Résultat :

./gsl_eigen.exe 3
0: 0.875115 0.743136 0.527843 0.411255 
1: 0.0409049 0.638787 -0.376612 -0.670906 
2: 0.000646659 0.19925 -0.761278 0.617053

3.3.6. PETSc

PETSc is a suite of data structures and routines for the scalable (parallel) solution of scientific applications modeled by partial differential equations. It supports MPI, shared memory pthreads, and GPUs through CUDA or OpenCL, as well as hybrid MPI-shared memory pthreads or MPI-GPU parallelism

developé par Argonne National Laboratory depuis 1991
surcouche MPI
très difficile à utiliser

3.3.6.a Exemple produit de matrices

CODE
petsc_matrix_product.cpp

#include <petscmat.h> 
#include <petscsys.h>
#include <xmmintrin.h>
#include <iostream>
using namespace std;
 
 
PetscInt DIM = 1000;
       
static char help[] = "Empty program.\n\n";
                 
int main(int argc, char *argv[]) {
    PetscMPIInt    rank;
    PetscErrorCode ierr;
    Mat A, B, C;
    PetscScalar      fill=4.0;
 
    srand(19702013);
    
    if (argc > 1) {
        DIM = atoi(argv[1]);
    }
    
    ierr = PetscInitialize(&argc,&argv,(char *)0,help); CHKERRQ(ierr);
 
    // get rank from MPI ordering:
    MPI_Comm_rank(PETSC_COMM_WORLD, &rank);
    PetscOptionsGetInt(NULL,"-size",&DIM,NULL);
    
    int size = DIM*DIM;
    
    PetscScalar *a, *b;
    
    PetscMalloc1(size*sizeof(PetscScalar), &a);
    PetscMalloc1(size*sizeof(PetscScalar), &b);
    
    // fill
    for (int i=0; i<size; ++i) {
        a[i] = (rand() % 131) * 0.01;
        b[i] = (rand() % 131) * 0.01;
    }
    
    MatCreate(MPI_COMM_SELF, &A);
    MatSetSizes(A,DIM, DIM, DIM, DIM);
    MatSetType(A,MATSEQDENSE);
    MatSeqDenseSetPreallocation(A,a);
    MatSeqDenseSetLDA(A,DIM);
    MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
 
    MatCreate(MPI_COMM_SELF, &B);
    MatSetSizes(B,DIM, DIM, DIM, DIM);
    MatSetType(B,MATSEQDENSE);
    MatSeqDenseSetPreallocation(B,b);
    MatSeqDenseSetLDA(B,DIM);
    MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
    MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
    
    ierr = MatMatMult(A,B,MAT_INITIAL_MATRIX,fill,&C);CHKERRQ(ierr);
    
    /*if (rank == 0) {
        cout << "rank = " << rank << endl;
        MatView(A, PETSC_VIEWER_STDOUT_SELF);
        MatView(B, PETSC_VIEWER_STDOUT_SELF);
        MatView(C, PETSC_VIEWER_STDOUT_SELF);
    }*/
    
    PetscFree(a);
    PetscFree(b);
    MatDestroy(&A);
    MatDestroy(&B);
    MatDestroy(&C);
     
    ierr = PetscFinalize();CHKERRQ(ierr);
     
    return 0;
}
 

3.3.7. Comparaison BLAS, GSL, MKL, PETSC

3.3.7.a Produit de matrices

Produit de matrices carrées : temps de calcul (en secondes) pour différentes librairies sur Intel i5-4570 CPU at 3.20GHz 64 bits (double)
method / dimension	1000	2000	4000	8000
blas	0.63	5.24	43.14	349.87
gsl	0.73	7.96	67.09	518.74
mkl (1 thread)	0.07	1.40	3.28	22.28
mkl (2 threads)	0.04	1.05	2.18	12.35
mkl (4 threads)	0.04	0.86	1.66	6.98
petsc (4 threads)	3.24	26.49	3m36	30m(*)

(*) Note : valeur estimée (facteur 8.15)

3.3.7.b Valeurs propres

Valeurs propres : temps de calcul pour différentes librairies sur Intel i5-4570 CPU at 3.20GHz 64 bits (double)
method / dimension	1000	2000	4000	8000
gsl (1 thread)	3.92	41.54	89.82	> 1h
mkl (1 thread)	1.25	11.70	88.65	665.09
mkl (2 threads)	0.92	9.70	74.93	658.76
mkl (4 threads)	0.82	9.35	72.12	582.31
mkl (4 threads) ⁽¹⁾	0.58	6.68	51.04	397.83
python (1 thread)	1.62	10.21	75.20	457.58
python (2 threads) + openblas ⁽²⁾	1.98	9.57	55.71	375.14
python (4 threads) + openblas ⁽³⁾	1.93	9.98	57.81	385.33

(1) : solution MKL avec jobvl = jobvr = 'N', les vecteurs droit et gauche ne sont pas calculés ce qui prend moins de temps.
(2) : avec la solution python + openblas on devient compétitif par rapport à MKL à mesure que la dimension de la matrice augmente.
(3) : avec la solution python + openblas l'utilisation de 4 threads est moins efficace qu'avec 2 threads ce qui semble étrange.

Page de Jean-Michel Richer