Author: martin fierz
Date: 14:24:53 02/15/02
Go up one level in this thread
On February 14, 2002 at 17:17:18, Eugene Nalimov wrote:
>Is it possible that by removing some code from your program you speed up the
>remaining code? E.g. better cache locality, less information for branch
>predictor to store (less conflicts), etc.?
>
>Eugene
forgive me for posting my ugly and probably inefficient code :-)
but maybe you see something in it which tells you about your question:
LSB, MSB and bitcount are implemented as table-lookups MSB and LSB in bytes,
bitcount in words. my guess is that the profiler is inaccurate because
positiontoindex is using such subroutine calls, whereas indextoposition is not.
do you find anything in this code to answer your question?
aloha
martin
int MSB(int32 x)
{
/* returns the position of the most significant bit in x */
/* or -1 if not found */
if(x&0xFF000000)
return(MSBarray[(x>>24)&0xFF]+24);
if(x&0x00FF0000)
return(MSBarray[(x>>16)&0xFF]+16);
if(x&0x0000FF00)
return(MSBarray[(x>>8)&0xFF]+8);
return(MSBarray[x&0xFF]);
}
extern int bicoef[33][33];
extern subdb subdatabase[MAXPIECES][MAXPIECES][MAXPIECES][MAXPIECES][7][7];
void positiontoindex(position *p, int32 *index, int color)
{
int bm, bk, wm, wk;
int bmrank=0;
int wmrank=0;
int i;
int32 x,y;
int32 bmindex=0,bkindex=0,wmindex=0,wkindex=0;
int32 bmrange=1, wmrange=1, bkrange=1;
int32 posindex=0;
/*
WHITE
28 29 30 31
24 25 26 27
20 21 22 23
16 17 18 19
12 13 14 15
8 9 10 11
4 5 6 7
0 1 2 3
BLACK
*/
// set bm, bk, wm, wk, and ranks
bm = bitcount(p->bm);
if(bm)
bmrank = MSB(p->bm)/4;
bk = bitcount(p->bk);
wm = bitcount(p->wm);
if(wm)
wmrank = (31-LSB(p->wm))/4;
wk = bitcount(p->wk);
// first, we set the index for the black men:
i=1;
y=p->bm;
while(y)
{
x=LSB(y);
y=y^(1<<x);
bmindex+=bicoef[x][i];
i++;
}
// next, we set it for the white men, disregarding black men:
i=1;
y=p->wm;
while(y)
{
x=MSB(y);
y=y^(1<<x);
x=31-x;
wmindex+=bicoef[x][i];
i++;
}
// then, black kings. this time, we include interfering black and white men.
i=1;
y=p->bk;
while(y)
{
x=LSB(y);
y=y^(1<<x);
// next line is the count of men on squares 0....x-1, as x-1 of a
0000010000000 number is 0000000111111111
x-=bitcount((p->bm|p->wm)&((1<<x)-1));
bkindex+=bicoef[x][i];
i++;
}
// last, white kings, with interfering other pieces
i=1;
y=p->wk;
while(y)
{
x=LSB(y);
y^=(1<<x);
x-=bitcount((p->bm|p->bk|p->wm) & ( (1<<x)-1 ) );
wkindex+=bicoef[x][i];
i++;
}
if(bm)
bmrange = bicoef[4*(bmrank+1)][bm] - bicoef[4*bmrank][bm];
if(wm)
wmrange = bicoef[4*(wmrank+1)][wm] - bicoef[4*wmrank][wm];
if(bk)
bkrange = bicoef[32-bm-wm][bk];
if(bmrank)
bmindex -= bicoef[4*bmrank][bm];
if(wmrank)
wmindex -= bicoef[4*wmrank][wm];
posindex = bmindex + wmindex*bmrange + bkindex*bmrange*wmrange +
wkindex*bmrange*wmrange*bkrange;
if(color==WHITE)
posindex+=subdatabase[bm][bk][wm][wk][bmrank][wmrank].databasesize/2;
*index = posindex;
}
void indextoposition(int32 index, position *p, int bm, int bk, int wm, int wk,
int bmrank, int wmrank, int *color)
{
// reverse the position to index thing
int halfsize;
int32 bmindex=0, bkindex=0, wmindex=0, wkindex=0;
int32 bmrange=1, wmrange=1, bkrange=1;
int multiplier;
int32 square_one=1;
int32 occupied;
int i,j,k,f;
int bkpos[MAXPIECES],wkpos[MAXPIECES];
p->bm = 0;
p->bk = 0;
p->wm = 0;
p->wk = 0;
halfsize = subdatabase[bm][bk][wm][wk][bmrank][wmrank].databasesize/2;
*color = BLACK;
if(index >= halfsize)
{
*color = WHITE;
index -= halfsize;
}
// extract the indexes for the piece types from the total index:
// get multiplier for wkindex:
if(bm)
bmrange = bicoef[4*(bmrank+1)][bm] - bicoef[4*bmrank][bm];
if(wm)
wmrange = bicoef[4*(wmrank+1)][wm] - bicoef[4*wmrank][wm];
if(bk)
bkrange = bicoef[32-bm-wm][bk];
multiplier = bmrange*wmrange*bkrange;
wkindex = index / multiplier;
index -= wkindex*multiplier;
multiplier = bmrange*wmrange;
bkindex = index / multiplier;
index -= bkindex*multiplier;
wmindex = index / bmrange;
index -= wmindex*bmrange;
bmindex = index;
// add the rank index
if(bm)
bmindex += bicoef[4*bmrank][bm];
if(wm)
wmindex += bicoef[4*wmrank][wm];
// now that we know the index numbers, we extract the pieces
// extract black men directly
i=27;
j=bm;
while(j)
{
while(bicoef[i][j]>bmindex)
i--;
bmindex-=bicoef[i][j];
p->bm |= square_one<<i;
j--;
}
// extract white men directly
i=27;
j=wm;
while(j)
{
while(bicoef[i][j]>wmindex)
i--;
wmindex-=bicoef[i][j];
p->wm |= square_one<<(31-i);
j--;
}
// extract positions of black kings
i=31;
for(j=bk;j>0;j--)
{
while(bicoef[i][j]>bkindex)
i--;
bkindex-=bicoef[i][j];
bkpos[j-1]=i;
}
// extract positions of white kings
i=31;
for(j=wk;j>0;j--)
{
while(bicoef[i][j]>wkindex)
i--;
wkindex-=bicoef[i][j];
wkpos[j-1]=i;
}
// now, put black kings on the board. we know: bkpos[0]...bkpos[bk-1] is
ordered
// with bkpos[0] being the smallest one, same goes for wkpos;
occupied = p->bm|p->wm;
k=0;
f=0;
for(i=0;i<32,k<bk;i++)
{
if(occupied & (square_one<<i))
continue;
if(bkpos[k] == f)
{
p->bk|=(square_one<<i);
k++;
}
f++;
}
occupied = p->bm|p->wm|p->bk;
k=0;
f=0;
for(i=0;i<32,k<wk;i++)
{
if(occupied & (square_one<<i))
continue;
if(wkpos[k] == f)
{
p->wk|=(square_one<<i);
k++;
}
f++;
}
}
>On February 14, 2002 at 16:54:32, martin fierz wrote:
>
>>On February 14, 2002 at 02:42:53, Artem Pyatakov wrote:
>>
>>>Martin,
>>>
>>>I have used profiles successfully MANY times to find out which specific areas of
>>>the program need optimization. What do you mean by "it just does not work"?
>>>Also, kNPS does not tell you what is making the program slow - profiles often
>>>do.
>>>
>>>Artem
>>
>>i mean what i say - the profiler is interfering so much with your program, that
>>you cannot trust what it says. i recently programmed an endgame database builder
>>for checkers and wanted to know if the positiontoindex or the indextoposition
>>function is faster. i asked the profiler which told me that indextoposition is 8
>>times faster.
>>this profile was generated in a loop
>>
>>for(i=0;i<dbsize;i++)
>> {
>> indextoposition(...)
>> positiontoindex(...)
>> }
>>
>>the time to run this program without profiling was 17 seconds. when i took out
>>the positiontoindex function of the loop, it took 13 seconds to run. since both
>>functions are rather complex, and since there are no common parts of the two
>>functions, it is clear that in fact, in the real program, positiontoindex is 3
>>times faster. this is off by a factor of 24 compared to what the profiler says.
>>since the second method is clearly measuring actual performance of my program, i
>>have to conclude that the profiler is dead wrong.
>>i guess it has something to do with the fact that positiontoindex calls some
>>small subroutines, while indextoposition does not. this makes the
>>positiontoindex routine seem slower in the profiling process than it actually is
>>since the profiler tries to profile the subroutines too. just a guess...
>>
>>aloha
>> martin
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.