Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: Nalimov: bsf/bsr intrinsics implementation still not optimal

Author: Dezhi Zhao

Date: 05:31:33 09/23/04

Go up one level in this thread


On September 23, 2004 at 06:12:48, Gerd Isenberg wrote:

>On September 22, 2004 at 13:55:46, Dezhi Zhao wrote:
>
>>I'm  really happy that bit operation instructions have become intrinsics for VC
>>compiler in VS 2003 and later.
>>
>>However the output asm code is still not optimal. It generates a pair of
>>redundant memory-register save and load instrucions. I also tested VC 2005
>>Express beta1. The same thing again....
>
>Yes, such not neccesary store/loads seem a problem with
>msc-intrinsics:
>
>http://chessprogramming.org/cccsearch/ccc.php?art_id=357561
>http://chessprogramming.org/cccsearch/ccc.php?art_id=357568
>http://chessprogramming.org/cccsearch/ccc.php?art_id=357570
>
>But in your case, based on the prototype of the instrinsic, i guess a boolean
>return, but a pointer to index, the bsf-intrinsic is foreced to store the found
>bitindex to memory.
>
>Gerd
>

You're probally right that the prototype fools the compiler. However I think the
complier still has good oppotunities to eliminate those store/load instructions
when it comes to optimize the code. Now look at the prototype again:

unsigned char _BitScanForward(unsigned long* index, unsigned long mask);

It returns a boolean value. This is to handle the case where mask == 0.
I don't think BitScanForward needs such exception handler. I like this better:

long _BitScanForward(long mask);

But you need to add a warning in the documentation that the return value is
undefined if mask == 0 just as the one you can find in bsf/bsr documentation.

I'm sure if it were of the simplier proptype, the current compiler would not
generate the redundant store/load instructions.

Zhao

>
>>
>>Here is the asm output:
>>
>>PUBLIC	?testbsf@@YIHH@Z				; testbsf
>>; Function compile flags: /Ogty
>>; File c:\documents and settings\zhao\my documents\visual studio
>>projects\bsf\bsf.cpp
>>;	COMDAT ?testbsf@@YIHH@Z
>>_TEXT	SEGMENT
>>_index$9629 = -4					; size = 4
>>?testbsf@@YIHH@Z PROC NEAR				; testbsf, COMDAT
>>; _mask$ = ecx
>>
>>; 19   : {
>>
>>	push	ecx
>>	push	esi
>>	mov	esi, ecx
>>
>>; 20   : 	int m1;
>>; 21   : 	int clone = 0;
>>
>>	xor	eax, eax
>>
>>; 22   :
>>; 23   : 	while(mask)
>>
>>	test	esi, esi
>>	je	SHORT $L9628
>>	npad	6
>>$L9627:
>>
>>; 24   : 	{
>>; 25   : 		unsigned long index;
>>; 26   : 		_BitScanForward(&index, mask);
>>
>>	bsf	edx, esi
>>	lea	ecx, DWORD PTR _index$9629[esp+8]
>>	mov	DWORD PTR [ecx], edx
>>
>>; 27   : 		m1 = 1 << index;
>>
>>	mov	ecx, DWORD PTR _index$9629[esp+8]
>>	mov	edx, 1
>>	shl	edx, cl
>>
>>; 28   : 		clone |= m1;
>>
>>	or	eax, edx
>>
>>; 29   : 		mask ^= m1;
>>
>>	xor	esi, edx
>>	jne	SHORT $L9627
>>$L9628:
>>	pop	esi
>>
>>; 30   : 	}
>>; 31   :
>>; 32   : 	return clone;
>>; 33   : }
>>
>>	pop	ecx
>>	ret	0
>>?testbsf@@YIHH@Z ENDP					; testbsf
>>_TEXT	ENDS
>>
>>
>>The test program:
>>
>>// bsf.cpp : Defines the entry point for the console application.
>>//
>>
>>#include "stdafx.h"
>>#include <iostream>
>>using namespace std;
>>
>>extern "C"
>>unsigned char _BitScanForward(unsigned long* index, unsigned long mask);
>>
>>#pragma intrinsic(_BitScanForward)
>>
>>int testbsf(int mask)
>>{
>>	int m1;
>>	int clone = 0;
>>
>>	while(mask)
>>	{
>>		unsigned long index;
>>		_BitScanForward(&index, mask);
>>		m1 = 1 << index;
>>		clone |= m1;
>>		mask ^= m1;
>>	}
>>
>>	return clone;
>>}
>>
>>int _tmain(int argc, _TCHAR* argv[])
>>{
>>   unsigned long mask = 0x1000;
>>
>>   cout << "Enter a positive integer as the mask: " << flush;
>>   cin >> mask;
>>   cout << "Mask: " << mask << " Clone: " << testbsf(mask) << endl;
>>
>>   return 0;
>>}



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.