Author: Gerd Isenberg
Date: 03:55:36 10/02/03
Go up one level in this thread
I tried some sse2 intrinsics with MSVC6 and P4:
#include <stdio.h>
#include <emmintrin.h>
__m128i source =
{
'\000', '\001', '\002', '\003',
'\004', '\005', '\006', '\007',
'\010', '\011', '\012', '\013',
'\014', '\015', '\016', '\017'
};
__m128i target;
void sse2test()
{
__m128i reg1 = _mm_load_si128 (&source);
reg1 = _mm_slli_si128 (reg1, 1);
_mm_store_si128 (&target, reg1);
}
int main(int argc, char* argv[])
{
sse2test();
printf("source = 0x%04x%04x%04x%04x\n",
source.m128i_u32[3],
source.m128i_u32[2],
source.m128i_u32[1],
source.m128i_u32[0] );
printf("target = 0x%04x%04x%04x%04x\n",
target.m128i_u32[3],
target.m128i_u32[2],
target.m128i_u32[1],
target.m128i_u32[0] );
getchar();
return 0;
}
the generated assembly looks fine in release version:
PUBLIC ?sse2test@@YAXXZ ; sse2test
; Function compile flags: /Ogty
; COMDAT ?sse2test@@YAXXZ
_TEXT SEGMENT
?sse2test@@YAXXZ PROC NEAR ; sse2test, COMDAT
mov eax, OFFSET FLAT:?source@@3T__m128i@@A ; source
mov ecx, OFFSET FLAT:?target@@3T__m128i@@A ; target
movdqa xmm0, XMMWORD PTR [eax]
pslldq xmm0, 1
movdqa XMMWORD PTR [ecx], xmm0
ret 0
?sse2test@@YAXXZ ENDP ; sse2test
but the debug version looks horrible:
PUBLIC ?sse2test@@YAXXZ ; sse2test
; Function compile flags: /Odt /GZ /ZI
; COMDAT ?sse2test@@YAXXZ
_TEXT SEGMENT
_reg1$ = -16
$T1865 = -32
$T1866 = -48
?sse2test@@YAXXZ PROC NEAR ; sse2test, COMDAT
; Line 14
push ebx
mov ebx, esp
sub esp, 8
and esp, -16 ; fffffff0H
add esp, 4
push ebp
mov ebp, DWORD PTR [ebx+4]
mov DWORD PTR [esp+4], ebp
mov ebp, esp
sub esp, 120 ; 00000078H
push esi
push edi
lea edi, DWORD PTR [ebp-120]
mov ecx, 30 ; 0000001eH
mov eax, -858993460 ; ccccccccH
rep stosd
; Line 15
mov eax, OFFSET FLAT:?source@@3T__m128i@@A ; source
movdqa xmm0, XMMWORD PTR [eax]
movdqa XMMWORD PTR $T1865[ebp], xmm0
movdqa xmm0, XMMWORD PTR $T1865[ebp]
movdqa XMMWORD PTR _reg1$[ebp], xmm0
; Line 16
movdqa xmm0, XMMWORD PTR _reg1$[ebp]
pslldq xmm0, 1
movdqa XMMWORD PTR $T1866[ebp], xmm0
movdqa xmm0, XMMWORD PTR $T1866[ebp]
movdqa XMMWORD PTR _reg1$[ebp], xmm0
; Line 17
movdqa xmm0, XMMWORD PTR _reg1$[ebp]
mov eax, OFFSET FLAT:?target@@3T__m128i@@A ; target
movdqa XMMWORD PTR [eax], xmm0
; Line 19
pop edi
pop esi
mov esp, ebp
pop ebp
mov esp, ebx
pop ebx
ret 0
?sse2test@@YAXXZ ENDP ; sse2test
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.