__global__ void kernel_sum(float *x, float *y, float *z, int size) { /* global thread index (depends on grid organization) */ int gtid = ...; /* check if we are in the arrays x, y, z */ if (gtid < size) { z[gtid] = x[gtid] + y[gtid]; // paralell part } }