Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Help! Visual C++ intrinsics! (Nalimov, are you around here somewhere

Author: Gerd Isenberg

Date: 03:55:36 10/02/03

Go up one level in this thread


I tried some sse2 intrinsics with MSVC6 and P4:

#include <stdio.h>
#include <emmintrin.h>

__m128i source =
{
	'\000', '\001', '\002', '\003',
	'\004', '\005', '\006', '\007',
	'\010', '\011', '\012', '\013',
	'\014', '\015', '\016', '\017'
};
__m128i target;

void sse2test()
{
	__m128i reg1 = _mm_load_si128 (&source);
	reg1 =  _mm_slli_si128 (reg1, 1);
	_mm_store_si128 (&target, reg1);

}

int main(int argc, char* argv[])
{
	sse2test();
	printf("source = 0x%04x%04x%04x%04x\n",
                source.m128i_u32[3],
                source.m128i_u32[2],
                source.m128i_u32[1],
                source.m128i_u32[0] );
	printf("target = 0x%04x%04x%04x%04x\n",
                target.m128i_u32[3],
                target.m128i_u32[2],
                target.m128i_u32[1],
                target.m128i_u32[0] );
	getchar();
	return 0;
}

the generated assembly looks fine in release version:

PUBLIC	?sse2test@@YAXXZ				; sse2test
; Function compile flags: /Ogty
;	COMDAT ?sse2test@@YAXXZ
_TEXT	SEGMENT
?sse2test@@YAXXZ PROC NEAR				; sse2test, COMDAT
	mov	eax, OFFSET FLAT:?source@@3T__m128i@@A	; source
	mov	ecx, OFFSET FLAT:?target@@3T__m128i@@A	; target
	movdqa	xmm0, XMMWORD PTR [eax]
	pslldq	xmm0, 1
	movdqa	XMMWORD PTR [ecx], xmm0
	ret	0
?sse2test@@YAXXZ ENDP					; sse2test


but the debug version looks horrible:


PUBLIC	?sse2test@@YAXXZ				; sse2test
; Function compile flags: /Odt /GZ /ZI
;	COMDAT ?sse2test@@YAXXZ
_TEXT	SEGMENT
_reg1$ = -16
$T1865 = -32
$T1866 = -48
?sse2test@@YAXXZ PROC NEAR				; sse2test, COMDAT
; Line 14
	push	ebx
	mov	ebx, esp
	sub	esp, 8
	and	esp, -16				; fffffff0H
	add	esp, 4
	push	ebp
	mov	ebp, DWORD PTR [ebx+4]
	mov	DWORD PTR [esp+4], ebp
	mov	ebp, esp
	sub	esp, 120				; 00000078H
	push	esi
	push	edi
	lea	edi, DWORD PTR [ebp-120]
	mov	ecx, 30					; 0000001eH
	mov	eax, -858993460				; ccccccccH
	rep stosd
; Line 15
	mov	eax, OFFSET FLAT:?source@@3T__m128i@@A	; source
	movdqa	xmm0, XMMWORD PTR [eax]
	movdqa	XMMWORD PTR $T1865[ebp], xmm0
	movdqa	xmm0, XMMWORD PTR $T1865[ebp]
	movdqa	XMMWORD PTR _reg1$[ebp], xmm0
; Line 16
	movdqa	xmm0, XMMWORD PTR _reg1$[ebp]
	pslldq	xmm0, 1
	movdqa	XMMWORD PTR $T1866[ebp], xmm0
	movdqa	xmm0, XMMWORD PTR $T1866[ebp]
	movdqa	XMMWORD PTR _reg1$[ebp], xmm0
; Line 17
	movdqa	xmm0, XMMWORD PTR _reg1$[ebp]
	mov	eax, OFFSET FLAT:?target@@3T__m128i@@A	; target
	movdqa	XMMWORD PTR [eax], xmm0
; Line 19
	pop	edi
	pop	esi
	mov	esp, ebp
	pop	ebp
	mov	esp, ebx
	pop	ebx
	ret	0
?sse2test@@YAXXZ ENDP					; sse2test







This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.