Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Nalimov: bsf/bsr intrinsics implementation still not optimal

Author: Gerd Isenberg

Date: 03:12:48 09/23/04

Go up one level in this thread


On September 22, 2004 at 13:55:46, Dezhi Zhao wrote:

>I'm  really happy that bit operation instructions have become intrinsics for VC
>compiler in VS 2003 and later.
>
>However the output asm code is still not optimal. It generates a pair of
>redundant memory-register save and load instrucions. I also tested VC 2005
>Express beta1. The same thing again....

Yes, such not neccesary store/loads seem a problem with
msc-intrinsics:

http://chessprogramming.org/cccsearch/ccc.php?art_id=357561
http://chessprogramming.org/cccsearch/ccc.php?art_id=357568
http://chessprogramming.org/cccsearch/ccc.php?art_id=357570

But in your case, based on the prototype of the instrinsic, i guess a boolean
return, but a pointer to index, the bsf-intrinsic is foreced to store the found
bitindex to memory.

Gerd


>
>Here is the asm output:
>
>PUBLIC	?testbsf@@YIHH@Z				; testbsf
>; Function compile flags: /Ogty
>; File c:\documents and settings\zhao\my documents\visual studio
>projects\bsf\bsf.cpp
>;	COMDAT ?testbsf@@YIHH@Z
>_TEXT	SEGMENT
>_index$9629 = -4					; size = 4
>?testbsf@@YIHH@Z PROC NEAR				; testbsf, COMDAT
>; _mask$ = ecx
>
>; 19   : {
>
>	push	ecx
>	push	esi
>	mov	esi, ecx
>
>; 20   : 	int m1;
>; 21   : 	int clone = 0;
>
>	xor	eax, eax
>
>; 22   :
>; 23   : 	while(mask)
>
>	test	esi, esi
>	je	SHORT $L9628
>	npad	6
>$L9627:
>
>; 24   : 	{
>; 25   : 		unsigned long index;
>; 26   : 		_BitScanForward(&index, mask);
>
>	bsf	edx, esi
>	lea	ecx, DWORD PTR _index$9629[esp+8]
>	mov	DWORD PTR [ecx], edx
>
>; 27   : 		m1 = 1 << index;
>
>	mov	ecx, DWORD PTR _index$9629[esp+8]
>	mov	edx, 1
>	shl	edx, cl
>
>; 28   : 		clone |= m1;
>
>	or	eax, edx
>
>; 29   : 		mask ^= m1;
>
>	xor	esi, edx
>	jne	SHORT $L9627
>$L9628:
>	pop	esi
>
>; 30   : 	}
>; 31   :
>; 32   : 	return clone;
>; 33   : }
>
>	pop	ecx
>	ret	0
>?testbsf@@YIHH@Z ENDP					; testbsf
>_TEXT	ENDS
>
>
>The test program:
>
>// bsf.cpp : Defines the entry point for the console application.
>//
>
>#include "stdafx.h"
>#include <iostream>
>using namespace std;
>
>extern "C"
>unsigned char _BitScanForward(unsigned long* index, unsigned long mask);
>
>#pragma intrinsic(_BitScanForward)
>
>int testbsf(int mask)
>{
>	int m1;
>	int clone = 0;
>
>	while(mask)
>	{
>		unsigned long index;
>		_BitScanForward(&index, mask);
>		m1 = 1 << index;
>		clone |= m1;
>		mask ^= m1;
>	}
>
>	return clone;
>}
>
>int _tmain(int argc, _TCHAR* argv[])
>{
>   unsigned long mask = 0x1000;
>
>   cout << "Enter a positive integer as the mask: " << flush;
>   cin >> mask;
>   cout << "Mask: " << mask << " Clone: " << testbsf(mask) << endl;
>
>   return 0;
>}



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.