Author: Anthony Cozzie
Date: 14:14:20 05/05/04
I experimented with attack tables on the recommendation of Vincent last fall.
The following code is based on his move generator. I eventually decided that on
32 bit machines, attack tables were a clear win over bitboards, but at 64 bits
it would be about a break-even deal, and since I already had a bitboard engine,
the choice was easy ;)
anthony
---------------------------------
#include "zappa.h"
//Since Zappa spends a lot of time updating the attack tables,
//I try to keep track of exactly what its doing.
//#define ATTACK_TABLE_PROFILING
#ifdef ATTACK_TABLE_PROFILING
#define ATTACK_TABLE_PROFILE(A) A
zappa_u64 at_sweep_calls = 0;
zappa_u64 at_fixed_calls = 0;
zappa_u64 at_xray_calls = 0;
zappa_u64 at_sweep_squares = 0;
zappa_u64 at_fixed_squares = 0;
zappa_u64 at_xray_squares = 0;
#else
#define ATTACK_TABLE_PROFILE(A)
#endif
#define SCAN_FSM_STATE_NOX 0
#define SCAN_FSM_STATE_MX 1
#define SCAN_FSM_STATE_PX 2
#define SCAN_FSM_STATE_QX 3
#define SCAN_FSM_STATE_QPX 4
//Add a piece to the attack tables
static zforceinline void add_to_attack_table_wpawn(zpos * RESTRICT p, int id,
int sq);
static zforceinline void add_to_attack_table_bpawn(zpos * RESTRICT p, int id,
int sq);
static zforceinline void add_to_attack_table_white_sweep(zpos * RESTRICT p, int
id, int ptype, int sq);
static zforceinline void add_to_attack_table_black_sweep(zpos * RESTRICT p, int
id, int ptype, int sq);
static zforceinline void add_to_attack_table_fixed(zpos * RESTRICT p, zappa_u32
* RESTRICT tbl, int id, int ptype, int sq);
//Remove a piece from the attack tables
static zforceinline void remove_from_attack_table_wpawn(zpos * RESTRICT p, int
id, int sq);
static zforceinline void remove_from_attack_table_bpawn(zpos * RESTRICT p, int
id, int sq);
static zforceinline void remove_from_attack_table_white_sweep(zpos * RESTRICT p,
int id, int ptype, int sq);
static zforceinline void remove_from_attack_table_black_sweep(zpos * RESTRICT p,
int id, int ptype, int sq);
static zforceinline void remove_from_attack_table_fixed(zpos * RESTRICT p,
zappa_u32 * RESTRICT tbl, int id, int ptype, int sq);
//Update xray attacks - the really annoying part, in terms of complexity
static zforceinline void add_to_attack_table_xray(zpos * RESTRICT p, zappa_u32 *
RESTRICT tbl, int id, int ptype, int sq, int xsq);
static zforceinline void remove_from_attack_table_xray(zpos * RESTRICT p,
zappa_u32 * RESTRICT tbl, int id, int ptype, int sq, int xsq);
const zappa_s8 rot90[64] = {0, 8,16,24,32,40,48,56,
1, 9,17,25,33,41,49,57,
2,10,18,26,34,42,50,58,
3,11,19,27,35,43,51,59,
4,12,20,28,36,44,52,60,
5,13,21,29,37,45,53,61,
6,14,22,30,38,46,54,62,
7,15,23,31,39,47,55,63};
//Square A in rot-space is at square h1a8rot[A] in flat-space
const zappa_s8 roth1a8[64] = {H1, G2, F3, E4, D5, C6, B7, A8,
H2, G3, F4, E5, D6, C7, B8, A1,
H3, G4, F5, E6, D7, C8, B1, A2,
H4, G5, F6, E7, D8, C1, B2, A3,
H5, G6, F7, E8, D1, C2, B3, A4,
H6, G7, F8, E1, D2, C3, B4, A5,
H7, G8, F1, E2, D3, C4, B5, A6,
H8, G1, F2, E3, D4, C5, B6, A7};
//Square A in flatspace is at square roth1a8[A] in rot-space
const zappa_s8 inv_roth1a8[64] = {H2, G3, F4, E5, D6, C7, B8, A1,
H3, G4, F5, E6, D7, C8, B1, A2,
H4, G5, F6, E7, D8, C1, B2, A3,
H5, G6, F7, E8, D1, C2, B3, A4,
H6, G7, F8, E1, D2, C3, B4, A5,
H7, G8, F1, E2, D3, C4, B5, A6,
H8, G1, F2, E3, D4, C5, B6, A7,
H1, G2, F3, E4, D5, C6, B7, A8};
//Square A in rot-space is at square a1h8rot[A] in flat-space
const zappa_s8 rota1h8[64] = {A1, B2, C3, D4, E5, F6, G7, H8,
B1, C2, D3, E4, F5, G6, H7, A8,
C1, D2, E3, F4, G5, H6, A7, B8,
D1, E2, F3, G4, H5, A6, B7, C8,
E1, F2, G3, H4, A5, B6, C7, D8,
F1, G2, H3, A4, B5, C6, D7, E8,
G1, H2, A3, B4, C5, D6, E7, F8,
H1, A2, B3, C4, D5, E6, F7, G8};
//Square A in flatspace is at square roth1a8[A] in rot-space
const zappa_s8 inv_rota1h8[64] = {A1, A2, A3, A4, A5, A6, A7, A8,
B8, B1, B2, B3, B4, B5, B6, B7,
C7, C8, C1, C2, C3, C4, C5, C6,
D6, D7, D8, D1, D2, D3, D4, D5,
E5, E6, E7, E8, E1, E2, E3, E4,
F4, F5, F6, F7, F8, F1, F2, F3,
G3, G4, G5, G6, G7, G8, G1, G2,
H2, H3, H4, H5, H6, H7, H8, H1};
//Precomputed move masks for rotated bitboards
bitboard rr_precomp[64][64]; //rook ranks: 8*64*64 = 32kB memory
bitboard rf_precomp[64][64]; //rook files: 8*64*64 = 32kB memory
bitboard ba_precomp[64][64]; //bishop a1->h8 diagonals: 8*64*64 = 32kB memory
bitboard bh_precomp[64][64]; //bishop h1->a8 diagonals: 8*64*64 = 32kB memory
//Precomputed move masks for kings, pawns, and knights
bitboard k_precomp[64]; //king moves: 8*64 = 512B
memory
bitboard n_precomp[64]; //knight moves: 8*64 = 512B
memory
bitboard p_rightmask; //rightmask contains all the squares not on the
A file
bitboard p_leftmask; //leftmask contains all the squares not on the
H file
bitboard p_allmask;
bitboard second_rank; //Squares on the second rank
bitboard seventh_rank; //Squares on the seventh rank
//given an array of 'delta x squares' and 'delta y squares' make a bitboard
//with ones on those squares and zeroes everywhere else
//also do not allow 'board wraps'
//invariant: r is of size 8
//called by: fill_king_precomputes(), fill_knight_precomputes()
static zforceinline bitboard get_precomp_move_mask(const int d_x[], const int
d_y[], int sq)
{
register bitboard v, bb_one;
register int i, x, y;
for(i = 0, v = 0, bb_one = 1; i < 8; i++)
{
x = (sq & 7) + d_x[i];
y = ((sq & 56) >> 3) + d_y[i];
if((x >= 0) & (x <= 7) & (y >= 0) & (y <= 7))
v |= bb_one << (sq + d_x[i] + 8 * d_y[i]);
}
return v;
}
//initialize lookup tables
void init_move_generation(void)
{
//constants for knight/king attack bitboards
const static int d_nx[8] = {-2,-1, 1, 2, 2, 1,-1,-2}, d_ny[8] = {-1,-2,-2,-1,
1, 2, 2, 1};
const static int d_kx[8] = {-1, 0, 1, 1, 1, 0,-1,-1}, d_ky[8] = {-1,-1,-1, 0,
1, 1, 1, 0};
int i, j, r, k, rs_index, extfill, hoff, aoff, pcomp, mcomp, rcomp, qcomp,
ptype, uptype, fsm_state, fsm_xstate;
int fsm_stop_state, fsm_type_state, xpawn, ray_xtype;
int barrier, xpiece, nfsm_xastate, nfsm_xrstate;
int nfsm_type_state, nfsm_stop_state, xminor, g;
int mgen_iter, sgen_iter, sq, x, y; //iterators
const int directions[3][9] = {{7, -7, 9, -9, 0}, //slider delta types (rooks
can move n*1, etc)
{1, -1, 8, -8, 0},
{1, -1, 8, -8, 7, -7, 9, -9, 0}};
const int dx[2][8] = {{2, 1, -1, -2, -2, -1, 1, 2}, {1, 1, 0, -1, -1, -1, 0,
1}};
const int dy[2][8] = {{1, 2, 2, 1, -1, -2, -2, -1}, {0, 1, 1, 1, 0, -1, -1,
-1}};
//Initialize edge table
for(i = 0; i < 64; i++)
edge_table[i] = (((i & 7) == 0) << 3) + (((i & 7) == 7) << 2) + (((i >> 3)
== 7) << 1) +(((i >> 3) == 0) << 0);
//xray piece lists
for(i = 0; i < 8; i++)
{
g = uncompress_gradient[i];
for(j = 0; j < 64; j++)
{
k = 0;
for(sq = j; (edge_table[sq] & scan_edge_table[g+9]) == 0; sq += g)
xray_slider_move_table[i][j][k++] = (ray_type_table[i] << 6) + sq + g;
xray_slider_move_table[i][j][int_max(k-1, 0)] |= 0xC0; //EOR
}
}
//sliders
for(i = 0; i < 3; i++) //loop over piece types
{
for(j = 0; j < 64; j++) //loop over initial squares
{
for(k = 0; k < 29; k++) //initialize (-1 -> end of list)
mgen_slider_move_table[i][j][k] = -1;
sgen_iter = mgen_iter = 0;
for(k = 0; directions[i][k] != 0; k++) //loop over ray
{
for(sq = j; 1;)
{
//edge test: top
if((sq >> 3) == 7 && directions[i][k] > 1)
break;
//edge test: bottom
if((sq >> 3) == 0 && directions[i][k] < -1)
break;
//edge test: right
if((sq & 7) == 7 && (directions[i][k] == -7 || directions[i][k] == 9 ||
directions[i][k] == 1))
break;
//edge test: left
if((sq & 7) == 0 && (directions[i][k] == 7 || directions[i][k] == -9 ||
directions[i][k] == -1))
break;
sq += directions[i][k]; //proceed along the ray
mgen_slider_move_table[i][j][mgen_iter++] = sq; //add to our list of squares
}
//0: xray White Pawns, diagonal sliders
//1: xray Black Pawns, diagonal sliders
//2: xray horizontal sliders
//3: End of ray.
if(abs(directions[i][k]) % 2 == 1 && abs(directions[i][k]) > 1)
ray_xtype = directions[i][k] > 0;
else
ray_xtype = 2;
//for all the square so far: if we terminate early, go to the next ray
for(; sgen_iter < mgen_iter; sgen_iter++)
mgen_slider_move_table[i][j][sgen_iter+32] = (((mgen_iter - sgen_iter - 1 ==
0) ? 3 : ray_xtype) << 4) | (mgen_iter - sgen_iter - 1);
}
}
}
//fixed pieces
for(i = 0; i < 2; i++)
{
for(j = 0; j < 64; j++)
{
for(k = 0; k < 16; k++)
mgen_fixed_move_table[i][j][k] = -1;
for(mgen_iter = k = 0; k < 8; k++)
{
x = File(j) + dx[i][k];
y = Rank(j) + dy[i][k];
if(x >= 0 && x <= 7 && y >= 0 && y <= 7)
mgen_fixed_move_table[i][j][mgen_iter++] = (y << 3) | x;
}
}
}
//precompute the castling move for easy comparison / concise code
wk_castle_move = m_build(0, 0, WKING, E1, G1);
wq_castle_move = m_build(0, 0, WKING, E1, C1);
bk_castle_move = m_build(0, 0, BKING, E8, G8);
bq_castle_move = m_build(0, 0, BKING, E8, C8);
//non xray state machine transistions and output
for(i = 0; i < 320; i++)
{
uptype = (ptype = i & 0xF) >> 1;
ray_xtype = (i >> 4) & 0x3;
fsm_state = i >> 6;
//are we done with this ray?
switch(ray_xtype)
{
case 0: barrier = (ptype == WPAWN) || (uptype == KNIGHT) || (uptype == KING)
|| (uptype == ROOK); break;
case 1: barrier = (ptype == BPAWN) || (uptype == KNIGHT) || (uptype == KING)
|| (uptype == ROOK); break;
case 2: barrier = (uptype == PAWN) || (uptype == KNIGHT) || (uptype == KING)
|| (uptype == BISHOP); break;
case 3: barrier = 1; break;
}
barrier |= (fsm_state == SCAN_FSM_STATE_PX) || (fsm_state ==
SCAN_FSM_STATE_QPX);
//skip decision
if(barrier)
at_scan_skip_decision[i] = 0x0F; //skip
else
at_scan_skip_decision[i] = 0x00; //continue
//bc decision
if(barrier)
at_scan_bc_decision[i] = SCAN_FSM_STATE_NOX;
else if(uptype == QUEEN)
at_scan_bc_decision[i] = SCAN_FSM_STATE_QX;
else if(uptype == PAWN && fsm_state < SCAN_FSM_STATE_QX)
at_scan_bc_decision[i] = SCAN_FSM_STATE_PX;
else if(uptype == PAWN && fsm_state >= SCAN_FSM_STATE_QX)
at_scan_bc_decision[i] = SCAN_FSM_STATE_QPX;
else if(ptype > 0 && fsm_state < SCAN_FSM_STATE_QX)
at_scan_bc_decision[i] = SCAN_FSM_STATE_MX;
else
at_scan_bc_decision[i] = fsm_state;
}
//xray xsquare state machine transistion
for(i = 0; i < 256; i++)
{
//uncompress
uptype = (ptype = i & 0xF) >> 1;
ray_xtype = (i >> 4) & 0x3;
fsm_type_state = (i >> 6) & 0x1; //M/Q
fsm_xstate = (i >> 7) & 0x1;
xminor = (ray_xtype < 2 && uptype == BISHOP) || (ray_xtype >= 2 && uptype ==
ROOK);
xpawn = (ray_xtype == 0 && ptype == BPAWN) || (ray_xtype == 1 && ptype ==
WPAWN);
//First transistion: is the piece we are adding/subtracting a slider?
if(xminor) nfsm_type_state = 3;
//0
else if(fsm_type_state == 0 && uptype == QUEEN) nfsm_type_state = 2;
//M-Q
else if(fsm_type_state == 1 && uptype == QUEEN) nfsm_type_state = 3;
//0
else if(fsm_type_state == 0 && xpawn) nfsm_type_state = 4;
//M-P
else if(fsm_type_state == 1 && xpawn) nfsm_type_state = 5;
//Q-P
else nfsm_type_state =
fsm_type_state; //M/Q
//stop state is always 0
nfsm_stop_state = 0;
//xstate: insertion is simply "do we xray", without caring what is on the
square
nfsm_xastate = fsm_xstate;
//Removal: we do count the piece on the xsquare
nfsm_xrstate = !xpawn && !xminor && uptype != QUEEN;
//build and store
at_xray_add_xsquare[i] = (nfsm_type_state << 2) | (nfsm_stop_state << 1) |
(nfsm_xastate);
at_xray_rem_xsquare[i] = (nfsm_type_state << 2) | (nfsm_stop_state << 1) |
(nfsm_xrstate);
}
//xray state machine transistions
for(i = 0; i < 1536; i++)
{
//uncompress
uptype = (ptype = i & 0xF) >> 1;
ray_xtype = (i >> 4) & 0x3;
fsm_xstate = (i >> 6) & 0x1;
fsm_stop_state = (i >> 7) & 0x1;
fsm_type_state = (i >> 8);
//translate info into things that actually cause state changes
//do we stop here?
switch(ray_xtype)
{
case 0: barrier = (ptype == WPAWN) || (uptype == KNIGHT) || (uptype == KING)
|| (uptype == ROOK); break;
case 1: barrier = (ptype == BPAWN) || (uptype == KNIGHT) || (uptype == KING)
|| (uptype == ROOK); break;
case 2: barrier = (uptype == PAWN) || (uptype == KNIGHT) || (uptype == KING)
|| (uptype == BISHOP); break;
case 3: barrier = 1; break;
}
barrier |= fsm_stop_state == 1; //also stop if we saw a pawn previously.
//xray pawn?
switch(ray_xtype)
{
case 0: xpawn = (ptype == BPAWN); break;
case 1: xpawn = (ptype == WPAWN); break;
case 2:
case 3: xpawn = 0; break;
}
//will we scan through this piece
xpiece = ptype != EMPTY && !barrier;
//Input: 4 bits ptype, 2 bits decision.
//FSM State: 1 bit xray, 1 bit stop_next, 6 states type_state
//type state machine: on insertion +T, on removal -T
//initial state M: We are a minor piece, and we attack this xsquare
either directly or through another minor.
// p->board[xsquare] was not a minor piece or a queen.
//initial state Q: We are a minor piece xraying a queen, or a queen.
// p->board[xsquare] was not a minor piece or a queen.
//initial state: M-Q: We are a minor piece, and we attack xsquare directly
or through another minor.
// p->board[xsquare] was a queen.
//initial state: 0: We were "cancelled out" by whatever we found at
xsquare. For example, we are a minor
// and xsquare is a minor (no change needed).
//initial state M-P: We are a minor piece, attack square directly or
through another minor, and p->board[xsquare]
// was an xray pawn.
//initial state Q-P: We are a minor xraying a queen, or a queen, and
p->board[xsquare] was an xray pawn.
//state transistion diagram
// M + Q -> Q
// M + * -> M
// Q + * -> Q
// M-Q + Q -> 0
// M-Q + * -> M-Q
// 0 + * -> 0
// M-P + Q -> Q
// M-P + * -> M
// Q-P + * -> Q
//stop state machine: (not really a state machine) handle pawn xray
//state 0: Previous square was not an xray pawn
//state 1: Previous square was not xsquare and an xray pawn
//Insertion xray state machine
//state 0: After removing the piece on xsquare, we will attack
this square directly
//state 1: After removing the piece on xsquare, we will attack
this square indirectly
//transistions:
// 1 + * -> 1
// 0 + XP -> 1
// 0 + * -> 0
//Removal xray state machine
//state 0: After inserting the piece on xsquare, we will attack
this square indirectly
//state 1: After inserting the piece on xsquare, we will not
attack this square at all.
//
//Most of the work here is done before. The only trick is that on xraying a
Pawn, we go from 0-1
//immediately afterward. Otherwise, there is no change.
// 1 + * -> 1
// 0 + ts > 4 -> 1
// 0 + * -> 0
//stop state
nfsm_stop_state = xpawn;
//type state
if(fsm_type_state == 0 && uptype == QUEEN) nfsm_type_state = 1;
else if(fsm_type_state == 2 && uptype == QUEEN) nfsm_type_state = 3;
else if(fsm_type_state == 4 && uptype == QUEEN) nfsm_type_state = 1;
else if(fsm_type_state == 4 && uptype != QUEEN) nfsm_type_state = 0;
else if(fsm_type_state == 5) nfsm_type_state = 1;
else nfsm_type_state =
fsm_type_state;
//insertion xray state
if(fsm_xstate == 0 && xpiece) nfsm_xastate = 1;
else nfsm_xastate = fsm_xstate;
//removal xray state
if(fsm_type_state > 3) nfsm_xrstate = 1;
else nfsm_xrstate = fsm_xstate;
//compress
at_xray_add_decision[i] = (barrier << 7) | (nfsm_type_state << 2) |
(nfsm_stop_state << 1) | (nfsm_xastate);
at_xray_rem_decision[i] = (barrier << 7) | (nfsm_type_state << 2) |
(nfsm_stop_state << 1) | (nfsm_xrstate);
}
//fill gradient vector and ray information
for(i = 0; i < 64; i++)
{
for(j = 0; j < 64; j++) //start by zeroing
out everything
grad_vector[i][j] = 0;
bhp_ray(i) = bhm_ray(i) = bap_ray(i) = bam_ray(i) = (zappa_u64)0;
rfp_ray(i) = rfm_ray(i) = rrp_ray(i) = rrm_ray(i) = (zappa_u64)0;
for(j = i; (j & 7) != 0 && (j >> 3) != 7;) { //if we are not on
the top/left of the board
j += 7; //advance in the
h1->a8 direction (+7)
bhp_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = 7; //note our
direction
}
for(j = i; (j & 7) != 7 && (j >> 3) != 0;) { //if we are not on
the bottom/right of the board
j -= 7; //advance in the
a8->h1 direction (-7)
bhm_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = -7; //note our
direction
}
for(j = i; (j & 7) != 7 && (j >> 3) != 7;) { //if we are not on
the top/right of the board
j += 9; //advance in the
a1->h8 direction (+9)
bap_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = 9; //note our
direction
}
for(j = i; (j & 7) != 0 && (j >> 3) != 0;) { //if we are not on
the bottom/left of the board
j -= 9; //advance in the
h8->a1 direction (-9)
bam_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = -9; //note our
direction
}
for(j = i; (j >> 3) != 7;) { //if we are not on
the top of the board
j += 8; //advance up the
file (+8)
rfp_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = 8; //note our
direction
}
for(j = i; (j >> 3) != 0;) { //if we are not on
the bottom of the board
j -= 8; //advance down the
file (-8)
rfm_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = -8; //note our
direction
}
for(j = i; (j & 7) != 7;) { //if we are not on
the right of the board
j += 1; //advance right on
the rank (+1)
rrp_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = 1; //note our
direction
}
for(j = i; (j & 7) != 0;) { //if we are not on
the left of the board
j -= 1; //advance left on
the rank (-1)
rrm_ray(i) |= (zappa_u64)1 << j; //add to ray
grad_vector[i][j] = -1; //note our
direction
}
b_rays(i) = bhp_ray(i) | bhm_ray(i) | bap_ray(i) | bam_ray(i);
r_rays(i) = rrm_ray(i) | rrp_ray(i) | rfm_ray(i) | rfp_ray(i);
}
//rotated bitboard attack generation
for(i = 0; i < 64; i++) //square loop
{
//Knights and Kings are easy
k_precomp[i] = get_precomp_move_mask(d_kx, d_ky, i);
n_precomp[i] = get_precomp_move_mask(d_nx, d_ny, i);
for(j = 0; j < 64; j++) //combination loop
{
//init
rf_precomp[i][j] = rr_precomp[i][j] = ba_precomp[i][j] = bh_precomp[i][j]
= (zappa_u64)0;
//rook file precomputes
for(r = ((i & 56) >> 3) - 1; r >= 0; r--) {
rf_precomp[i][j] |= ((zappa_u64)1 << ((i & 7) + (r << 3)));
if(((1 << r) & (j << 1)) != 0)
break;
}
for(r = ((i & 56) >> 3) + 1 ; r < 8; r++) {
rf_precomp[i][j] |= ((zappa_u64)1 << ((i & 7) + (r << 3)));
if(((1 << r) & (j << 1)) != 0)
break;
}
//rook rank precomputes
for(r = (i & 7) - 1; r >= 0; r--) {
rr_precomp[i][j] |= ((zappa_u64)1 << ((i & 56) + r));
if(((1 << r) & (j << 1)) != 0)
break;
}
for(r = (i & 7) + 1 ; r < 8; r++) {
rr_precomp[i][j] |= ((zappa_u64)1 << ((i & 56) + r));
if(((1 << r) & (j << 1)) != 0)
break;
}
extfill = (j << 1); //our /4 trick
//h1a8 precomputes
//rs_index = index in bit: shift offset - byte offset
hoff = (i + Rank(i) - 7) & 7;
rs_index = inv_roth1a8[i] - hoff*8;
for(k = rs_index; 1;)
{
//1. Check for overflow out of byte
if(k < 0 || k > 7) break;
//break if we are about to go off the edge of the real board
if(Rank(roth1a8[k + hoff*8]) == RANK8 || File(roth1a8[k + hoff*8]) == FILEA)
break;
//Add square to attack list
k++; bh_precomp[i][j] |= (zappa_u64)1 << roth1a8[k + hoff*8];
//break on non-empty square
if((extfill >> k & 1)) break;
}
for(k = rs_index; 1;)
{
//1. Check for overflow out of byte
if(k < 0 || k > 7) break;
//break if we are about to go off the edge of the real board
if(Rank(roth1a8[k + hoff*8]) == RANK1 || File(roth1a8[k + hoff*8]) == FILEH)
break;
//Add square to attack list
k--; bh_precomp[i][j] |= (zappa_u64)1 << roth1a8[k + hoff*8];
//break on non-empty square
if((extfill >> k & 1)) break;
}
aoff = (i - Rank(i)) & 7;
rs_index = inv_rota1h8[i] - aoff*8;
for(k = rs_index; 1;)
{
//1. Check for overflow out of byte
if(k < 0 || k > 7) break;
//break if we are about to go off the edge of the real board
if(Rank(rota1h8[k + aoff*8]) == RANK8 || File(rota1h8[k + aoff*8]) == FILEH)
break;
//Add square to attack list
k++; ba_precomp[i][j] |= (zappa_u64)1 << rota1h8[k + aoff*8];
//break on non-empty square
if((extfill >> k & 1)) break;
}
for(k = rs_index; 1;)
{
//1. Check for overflow out of byte
if(k < 0 || k > 7) break;
//break if we are about to go off the edge of the real board
if(Rank(rota1h8[k + aoff*8]) == RANK1 || File(rota1h8[k + aoff*8]) == FILEA)
break;
//Add square to attack list
k--; ba_precomp[i][j] |= (zappa_u64)1 << rota1h8[k + aoff*8];
//break on non-empty square
if((extfill >> k & 1)) break;
}
}
}
//Fill in at_compress table
for(i = 0; i < 1024; i++)
{
//Break down into attackers of each type
pcomp = pawn_attackers(i);
mcomp = knight_attackers(i) + bishop_attackers(i);
rcomp = rook_attackers(i);
qcomp = queen_attackers(i) + king_attackers(i);
//Spill over into higher buckets && saturate
if(pcomp > 1) { mcomp += pcomp - 1; pcomp = 1; }
if(mcomp > 3) { rcomp += mcomp - 3; mcomp = 3; }
if(rcomp > 1) { qcomp += rcomp - 1; rcomp = 1; }
qcomp = int_min(qcomp, 3);
at_compress_table[i] = pcomp + 4*mcomp + 8*rcomp + 16*qcomp;
}
p_rightmask = 0x7F7F7F7F7F7F7F7FULL; //rightmask contains all the squares
not on the A file
p_leftmask = 0xFEFEFEFEFEFEFEFEULL; //leftmask contains all the squares
not on the H file
p_allmask = 0x7E7E7E7E7E7E7E7EULL; //allmask contains all squares not on
the A or H files
second_rank = 0x000000000000FF00ULL; //All squares on the second rank
seventh_rank = 0x00FF000000000000ULL; //All squares on the seventh rank
}
extern zappa_u8 is_horiz_slider[8];
extern zappa_u8 is_diag_slider[8];
void add_to_attack_table(zpos *p, int ptype, int id, int sq)
{
int i, a;
switch(ptype & 0xF)
{
case BPAWN: add_to_attack_table_bpawn(p, id, sq); break;
case WPAWN: add_to_attack_table_wpawn(p, id, sq); break;
case BKNIGHT:
case WKNIGHT:
case BKING:
case WKING: add_to_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq);
break;
case BBISHOP:
case BROOK:
case BQUEEN: add_to_attack_table_black_sweep(p, id, ptype, sq); break;
case WBISHOP:
case WROOK:
case WQUEEN: add_to_attack_table_white_sweep(p, id, ptype, sq); break;
}
for(i = 0; i < 2; i++)
{
for(a = (p->at[i][sq+64] >> 16) & p->slideridm[i]; a != 0; a &= a-1) {
id = lead_one_32(a); //shamelessly reusing a variable
remove_from_attack_table_xray(p, p->at[i], id, pl_ptype(p->pl[i][id]), sq,
pl_square(p->pl[i][id]));
}
}
}
//non-incremental version
void add_to_attack_table_noxray(zpos *p, int ptype, int id, int sq)
{
switch(ptype & 0xF)
{
case BPAWN: add_to_attack_table_bpawn(p, id, sq); break;
case WPAWN: add_to_attack_table_wpawn(p, id, sq); break;
case BKNIGHT:
case WKNIGHT:
case BKING:
case WKING: add_to_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq);
break;
case BBISHOP:
case BROOK:
case BQUEEN: add_to_attack_table_black_sweep(p, id, ptype, sq); break;
case WBISHOP:
case WROOK:
case WQUEEN: add_to_attack_table_white_sweep(p, id, ptype, sq); break;
default: break;
}
}
void remove_from_attack_table(zpos *p, int sq)
{
int i, id, ptype;
unsigned int a;
ptype = p->board[sq];
id = p->pboard[sq];
switch(ptype & 0xF)
{
case BPAWN: remove_from_attack_table_bpawn(p, id, sq); break;
case WPAWN: remove_from_attack_table_wpawn(p, id, sq); break;
case BKNIGHT:
case WKNIGHT:
case BKING:
case WKING: remove_from_attack_table_fixed(p, p->at[ptype & 1], id, ptype,
sq); break;
case BBISHOP:
case BROOK:
case BQUEEN: remove_from_attack_table_black_sweep(p, id, ptype, sq); break;
case WBISHOP:
case WROOK:
case WQUEEN: remove_from_attack_table_white_sweep(p, id, ptype, sq); break;
}
for(i = 0; i < 2; i++)
{
for(a = (p->at[i][sq+64] >> 16) & p->slideridm[i]; a != 0; a &= a-1) {
id = lead_one_32(a); //shamelessly reusing a variable
add_to_attack_table_xray(p, p->at[i], id, pl_ptype(p->pl[i][id]), sq,
pl_square(p->pl[i][id]));
}
}
}
static zforceinline void add_to_attack_table_xray(zpos * RESTRICT p, zappa_u32 *
RESTRICT tbl, int id, int ptype, int sq, int xsq)
{
int dsq, rindex, ip, bc, cs, i;
zappa_u32 mt, xid[2], xtype[6];
zappa_u8 *move_table;
ATTACK_TABLE_PROFILE(at_xray_calls++);
//What type of ray are we?
rindex = compress_gradient[(dsq = grad(xsq, sq)) + 9];
//Check for early exit. For example, if we are a Bishop on B7
//and "xraying" H1, we clearly have no squares to update. 90% of the time not
taken,
//so it shouldn't hurt the branch predictor too much.
if((scan_edge_table[dsq+9] & edge_table[sq]) != 0)
return;
ip = (ptype >= BQUEEN);
//Check to see if we directly attack the square - this means
//we have to scan to find out our type.
if((tbl[sq+64] & (tbl[sq+64] >> 16) & (1 << id)) == 0)
{
//Another early exit case: We attacked through a pawn, e.g. B P p ->
//we don't need to do anything at all.
if((p->board[sq - dsq] >> 1) == PAWN)
return;
//Prescan
for(i = xsq+dsq; i != sq; i += dsq)
ip |= (p->board[i] >= BQUEEN);
ip |= 2; //We know we are xraying something, set xray bit.
}
//OK, we know we are going to have to do some dirty work.
//Initialize conditions
bc = at_xray_add_xsquare[(ip << 6) + (ray_type_table[rindex] << 4) +
p->board[sq]];
cs = *(move_table = xray_slider_move_table[rindex][sq]) & 0x3F;
//initialize type
mt = at_inc(ptype);
xtype[0] = mt; xtype[1] = at_inc(BQUEEN); xtype[2] = mt - at_inc(BQUEEN);
xtype[3] = xtype[4] = xtype[5] = 0;
//initialize ID
xid[0] = (1 << (id + 16)) | (1 << id);
xid[1] = 1 << (id + 16);
//Do the actual update
do {
ATTACK_TABLE_PROFILE(at_xray_squares++);
tbl[cs] += xtype[bc >> 2];
tbl[cs+64] |= xid[bc & 1];
bc = (bc << 6) | ((*move_table & 0xC0) >> 2) | p->board[cs];
bc = at_xray_add_decision[bc];
move_table += 1;
cs = *move_table & 0x3F;
} while((bc & 0x80) == 0);
}
static zforceinline void remove_from_attack_table_xray(zpos * RESTRICT p,
zappa_u32 *RESTRICT tbl, int id, int ptype, int sq, int xsq)
{
int dsq, rindex, ip, bc, cs, i;
zappa_u32 mt, xid[2], xtype[6];
zappa_u8 *move_table;
ATTACK_TABLE_PROFILE(at_xray_calls++);
//What type of ray are we?
rindex = compress_gradient[(dsq = grad(xsq, sq)) + 9];
//Check for early exit. For example, if we are a Bishop on B7
//and "xraying" H1, we clearly have no squares to update. 90% of the time not
taken,
//so it shouldn't hurt the branch predictor too much.
if((scan_edge_table[dsq+9] & edge_table[sq]) != 0)
return;
ip = (ptype >= BQUEEN);
//Check to see if we directly attack the square - this means
//we have to scan to find out our type.
if((tbl[sq+64] & (tbl[sq+64] >> 16) & (1 << id)) == 0)
{
//Another early exit case: We attacked through a pawn, e.g. B P p ->
//we don't need to do anything at all.
if((p->board[sq - dsq] >> 1) == PAWN)
return;
//Prescan
for(i = xsq+dsq; i != sq; i += dsq)
ip |= (p->board[i] >= BQUEEN);
ip |= 2; //We know we are xraying something, set xray bit.
}
//OK, we know we are going to have to do some dirty work.
//Initialize conditions
bc = at_xray_rem_xsquare[(ip << 6) + (ray_type_table[rindex] << 4) +
p->board[sq]];
cs = *(move_table = xray_slider_move_table[rindex][sq]) & 0x3F;
//initialize type
mt = at_inc(ptype);
xtype[0] = mt; xtype[1] = at_inc(BQUEEN); xtype[2] = mt - at_inc(BQUEEN);
xtype[3] = xtype[4] = xtype[5] = 0;
//initialize ID
xid[0] = ~(1 << id); //reset lower bit only
xid[1] = ~((1 << (id + 16)) | (1 << id)); //reset both bits
//Do the actual update
do {
ATTACK_TABLE_PROFILE(at_xray_squares++);
tbl[cs] -= xtype[bc >> 2];
tbl[cs+64] &= xid[bc & 1];
bc = (bc << 6) | ((*move_table & 0xC0) >> 2) | p->board[cs];
bc = at_xray_rem_decision[bc];
move_table += 1;
cs = *move_table & 0x3F;
} while((bc & 0x80) == 0);
}
//
//Functions to add a piece to the attack tables
//
static zforceinline void add_to_attack_table_wpawn(zpos * RESTRICT p, int id,
int sq)
{
int aid = ((1 << 16) + 1) << id;
if(!issquare_tr(sq)) {
p->at[1][sq+9] += at_inc(BPAWN);
p->at[1][sq+9+64] += aid;
}
if(!issquare_tl(sq)) {
p->at[1][sq+7] += at_inc(BPAWN);
p->at[1][sq+7+64] += aid;
}
}
static zforceinline void add_to_attack_table_bpawn(zpos * RESTRICT p, int id,
int sq)
{
int aid = ((1 << 16) + 1) << id;
if(!issquare_bl(sq)) {
p->at[0][sq-9] += at_inc(BPAWN);
p->at[0][sq-9+64] += aid;
}
if(!issquare_br(sq)) {
p->at[0][sq-7] += at_inc(BPAWN);
p->at[0][sq-7+64] += aid;
}
}
static zforceinline void add_to_attack_table_white_sweep(zpos * RESTRICT p, int
id, int ptype, int sq)
{
int target, cs, bc, pb, mt, _id, _xid;
zappa_u32 xtype[5], xid[5];
zappa_s8 * RESTRICT move_table;
ATTACK_TABLE_PROFILE(at_sweep_calls++);
//Select which list of moves to use, initialize cs to first element and bc to
zero.
bc = 0;
cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
_id = (1 << id) | (1 << (id + 16));
_xid = 1 << (id + 16);
//Build our table on the fly. This is actually pretty fast: no dependencies,
high IPC,
//and it merges with the prologue anyway. Athlon can do two 32 bit
stores/cycle, P4 only one.
xid[0] = _id; xid[1] = xid[2] = xid[3] =
xid[4] = _xid;
xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] =
at_inc(BQUEEN);
//1 iteration is 18 cycles on an Athlon XP
do {
ATTACK_TABLE_PROFILE(at_sweep_squares++);
pb = p->board[cs];
mt = *(move_table+32);
p->at[1][cs] += xtype[bc];
p->at[1][cs+64] += xid[bc];
bc = (bc << 6) + (mt & 0x30) + pb;
target = mt & at_scan_skip_decision[bc];
move_table += 1;
bc = at_scan_bc_decision[bc];
move_table += target;
} while((cs = *move_table) >= 0);
}
static zforceinline void add_to_attack_table_black_sweep(zpos * RESTRICT p, int
id, int ptype, int sq)
{
int target, cs, bc, pb, mt, _id, _xid;
zappa_u32 xtype[5], xid[5];
zappa_s8 * RESTRICT move_table;
ATTACK_TABLE_PROFILE(at_sweep_calls++);
//Select which list of moves to use, initialize cs to first element and bc to
zero.
bc = 0;
cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
_id = (1 << id) | (1 << (id + 16));
_xid = 1 << (id + 16);
//Build our table on the fly. This is actually pretty fast: no dependencies,
high IPC,
//and it merges with the prologue anyway. Athlon can do two 32 bit
stores/cycle, P4 only one.
xid[0] = _id; xid[1] = xid[2] = xid[3] =
xid[4] = _xid;
xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] =
at_inc(BQUEEN);
//1 iteration is 18 cycles on an Athlon XP
do {
ATTACK_TABLE_PROFILE(at_sweep_squares++);
pb = p->board[cs];
mt = *(move_table+32);
p->at[0][cs] += xtype[bc];
p->at[0][cs+64] += xid[bc];
bc = (bc << 6) + (mt & 0x30) + pb;
target = mt & at_scan_skip_decision[bc];
move_table += 1;
bc = at_scan_bc_decision[bc];
move_table += target;
} while((cs = *move_table) >= 0);
}
//Sliders are annoying: fixed pieces are much more straightforward
static zforceinline void add_to_attack_table_fixed(zpos * RESTRICT p, zappa_u32
* RESTRICT tbl, int id, int ptype, int sq)
{
int cs;
zappa_u32 xid, type;
zappa_s8 *move_table;
ATTACK_TABLE_PROFILE(at_fixed_calls++);
xid = (1 << id) | (1 << (id + 16));
type = at_inc(ptype);
//initialization
move_table = mgen_fixed_move_table[zt_fixed_index[ptype]][sq];
cs = *move_table;
do {
ATTACK_TABLE_PROFILE(at_fixed_squares++);
tbl[cs] += type;
tbl[cs+64] += xid;
move_table++;
} while((cs = *move_table) >= 0);
}
//
//Functions to remove a piece from the attack tables
//
static zforceinline void remove_from_attack_table_bpawn(zpos * RESTRICT p, int
id, int sq)
{
int xid = ~((1 << (id + 16)) + (1 << id));
if(!issquare_bl(sq)) {
p->at[0][sq-9] -= at_inc(BPAWN);
p->at[0][sq-9+64] &= xid;
}
if(!issquare_br(sq)) {
p->at[0][sq-7] -= at_inc(BPAWN);
p->at[0][sq-7+64] &= xid;
}
}
static zforceinline void remove_from_attack_table_wpawn(zpos * RESTRICT p, int
id, int sq)
{
int xid = ~((1 << (id + 16)) + (1 << id));
if(!issquare_tr(sq)) {
p->at[1][sq+9] -= at_inc(BPAWN);
p->at[1][sq+9+64] &= xid;
}
if(!issquare_tl(sq)) {
p->at[1][sq+7] -= at_inc(BPAWN);
p->at[1][sq+7+64] &= xid;
}
}
static zforceinline void remove_from_attack_table_white_sweep(zpos * RESTRICT p,
int id, int ptype, int sq)
{
int target, cs, bc, pb, mt, _id, _xid;
zappa_u32 xtype[5], xid[5];
zappa_s8 * RESTRICT move_table;
ATTACK_TABLE_PROFILE(at_sweep_calls++);
//Select which list of moves to use, initialize cs to first element and bc to
zero.
bc = 0;
cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
_id = (1 << id) | (1 << (id + 16));
_xid = 1 << (id + 16);
//Build our table on the fly. This is actually pretty fast: no dependencies,
high IPC,
//and it merges with the prologue anyway. Athlon can do two 32 bit
stores/cycle, P4 only one.
xid[0] = _id; xid[1] = xid[2] = xid[3] =
xid[4] = _xid;
xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] =
at_inc(BQUEEN);
//1 iteration is 18 cycles on an Athlon XP
do {
ATTACK_TABLE_PROFILE(at_sweep_squares++);
pb = p->board[cs];
mt = *(move_table+32);
p->at[1][cs] -= xtype[bc];
p->at[1][cs+64] -= xid[bc];
bc = (bc << 6) + (mt & 0x30) + pb;
target = mt & at_scan_skip_decision[bc];
move_table += 1;
bc = at_scan_bc_decision[bc];
move_table += target;
} while((cs = *move_table) >= 0);
}
static zforceinline void remove_from_attack_table_black_sweep(zpos * RESTRICT p,
int id, int ptype, int sq)
{
int target, cs, bc, pb, mt, _id, _xid;
zappa_u32 xtype[5], xid[5];
zappa_s8 * RESTRICT move_table;
ATTACK_TABLE_PROFILE(at_sweep_calls++);
//Select which list of moves to use, initialize cs to first element and bc to
zero.
bc = 0;
cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
_id = (1 << id) | (1 << (id + 16));
_xid = 1 << (id + 16);
//Build our table on the fly. This is actually pretty fast: no dependencies,
high IPC,
//and it merges with the prologue anyway. Athlon can do two 32 bit
stores/cycle, P4 only one.
xid[0] = _id; xid[1] = xid[2] = xid[3] =
xid[4] = _xid;
xtype[0] = xtype[1] = xtype[2] = at_inc(ptype); xtype[3] = xtype[4] =
at_inc(BQUEEN);
//1 iteration is 18 cycles on an Athlon XP
do {
ATTACK_TABLE_PROFILE(at_sweep_squares++);
pb = p->board[cs];
mt = *(move_table+32);
p->at[0][cs] -= xtype[bc];
p->at[0][cs+64] -= xid[bc];
bc = (bc << 6) + (mt & 0x30) + pb;
target = mt & at_scan_skip_decision[bc];
move_table += 1;
bc = at_scan_bc_decision[bc];
move_table += target;
} while((cs = *move_table) >= 0);
}
//Sliders are annoying: fixed pieces are much more straightforward
static zforceinline void remove_from_attack_table_fixed(zpos * RESTRICT p,
zappa_u32 * RESTRICT tbl, int id, int ptype, int sq)
{
int cs;
zappa_u32 xid, type;
zappa_s8 *move_table;
ATTACK_TABLE_PROFILE(at_fixed_calls++);
xid = (1 << id) | (1 << (id + 16));
type = at_inc(ptype);
//initialization
move_table = mgen_fixed_move_table[zt_fixed_index[ptype]][sq];
cs = *move_table;
do {
ATTACK_TABLE_PROFILE(at_fixed_squares++);
tbl[cs] -= type;
tbl[cs+64] -= xid;
move_table++;
} while((cs = *move_table) >= 0);
}
void print_attack_table_profile_info(void)
{
#ifdef ATTACK_TABLE_PROFILING
printf("Sweep square average: %f\n", (float)((double)(at_sweep_squares) /
(double)(at_sweep_calls)));
printf("Fixed square average: %f\n", (float)((double)(at_fixed_squares) /
(double)(at_fixed_calls)));
printf("Xray square average: %f\n", (float)((double)(at_xray_squares) /
(double)(at_xray_calls)));
printf("Sweep Calls: %d\n", (int)at_sweep_calls);
printf("Fixed Calls: %d\n", (int)at_fixed_calls);
printf("Xray Calls: %d\n", (int)at_xray_calls);
#endif
}
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.