; ------------------------------------------------------------------ ; Author: Jean-Michel RICHER ; Email: jean-michel.richer@univ-angers.fr ; Date: September 2018 ; ------------------------------------------------------------------ ; ; version with .text section writable ; so the instruction in the .text section is modified ; %include "src/asm_config.inc" global asm_sse_sort ; ============== ; ==== DATA ==== ; ============== section .data ; ------------------------------------- ; table of values for PSHUFD align 16 pshufd_table: db 228,0,0,0,0,0,0,0,0, db 0,0,0,0,0,0,0,0, db 228,0,0,0,0,0,0,0, db 108,0,0,0,0,0,0,0, db 0,225,177,0,0,0,0,0, db 0,0,0,0,0,0,0,0, db 180,0,180,0,0,0,0,0, db 156,0,0,0,0,0,0,0, db 0,0,0,210,0,198,0,0, db 0,0,0,0,0,0,0,0, db 0,0,216,0,0,210,0,0, db 120,0,0,114,0,0,0,0, db 0,225,225,0,0,201,0,0, db 0,0,0,0,0,0,0,0, db 0,0,228,0,0,0,0,0, db 0,0,0,0,0,0,0,0, db 0,0,0,0,0,0,0,147, db 0,0,0,27,0,0,0,0, db 0,0,0,0,0,0,0,99, db 99,0,0,75,0,0,0,0, db 0,0,141,0,0,45,0,0, db 135,0,0,39,0,0,0,0, db 0,0,0,0,0,0,0,0, db 147,0,0,0,0,0,0,0, db 0,0,0,54,0,54,0,0, db 0,0,0,30,0,0,0,0, db 0,0,0,0,0,0,0,0, db 0,0,0,78,0,0,0,0, db 0,0,0,0,0,57,0,0, db 0,0,0,0,0,0,0,0, db 0,0,0,0,0,0,0,0, db 0,0,0,0,0,0,0 ; ============== ; ==== TEXT ==== ; ============== section .text ; ------------------------------------------ ; !!!!!!!!!!!!!!!!!! Note !!!!!!!!!!!!!!!!!! ; this is a fast call subprogram so first ; parameter t is placed in ECX in 32 bits ; for GCC/G++ ; ; void asm_sse_sort(int *t) ; ; ------------------------------------------ asm_sse_sort: movdqu xmm0, [ecx] ; xmm1 is a rotation of xmm0 pshufd xmm1, xmm0, 00111001b ; 0x39 ; xmm2 is a rotation of xmm0 pshufd xmm2, xmm0, 01001110b ; 0x4E ; save ebx cause it will be modified push ebx ; mask for PEXT instruction mov edx, 0x1111 ; compare xmm1 to xmm0 and get result in xmm1 pcmpgtd xmm1, xmm0 ; get result of comparison pmovmskb eax, xmm1 pext ebx, eax, edx shl ebx, 4 ; compare xmm2 to xmm0 and get result in xmm2 pcmpgtd xmm2, xmm0 pmovmskb eax, xmm2 pext eax, eax, edx ; compute identifier add eax, ebx ; modify PSHUFD constant mov edx, pshufd_label_text movzx eax, byte [pshufd_table + eax] ; mov [edx+4], al ; restore EBX pop ebx pshufd_label_text: pshufd xmm0,xmm0,0xe0 ; <---- modify here movdqu [ecx],xmm0 ret