Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: The Zappa Attack Table Code

Author: Vasik Rajlich

Date: 04:17:51 05/06/04

Go up one level in this thread


On May 05, 2004 at 17:14:20, Anthony Cozzie wrote:

>I experimented with attack tables on the recommendation of Vincent last fall.
>The following code is based on his move generator.  I eventually decided that on
>32 bit machines, attack tables were a clear win over bitboards, but at 64 bits
>it would be about a break-even deal, and since I already had a bitboard engine,
>the choice was easy ;)
>
>anthony
>

I'm curious, why do you think bitboards are break-even at 64 bits, but a loss at
32?

I have noticed that Rybka's NPS relative to Fritz varies greatly based on the
architecture.

Aside from 32 vs 64 bits, L2 cache is probably very important. Rybka's
precomputed bitboard masks take just under 200 K (this # should be more or less
the same for any rotated bitboard engine), but I don't know how this behaves in
the cache.

Vas

>---------------------------------
>#include "zappa.h"
>
>//Since Zappa spends a lot of time updating the attack tables,
>//I try to keep track of exactly what its doing.
>
>//#define ATTACK_TABLE_PROFILING
>
>#ifdef ATTACK_TABLE_PROFILING
>#define ATTACK_TABLE_PROFILE(A) A
>zappa_u64 at_sweep_calls     = 0;
>zappa_u64 at_fixed_calls     = 0;
>zappa_u64 at_xray_calls      = 0;
>zappa_u64 at_sweep_squares   = 0;
>zappa_u64 at_fixed_squares   = 0;
>zappa_u64 at_xray_squares    = 0;
>#else
>#define ATTACK_TABLE_PROFILE(A)
>#endif
>
>#define SCAN_FSM_STATE_NOX   0
>#define SCAN_FSM_STATE_MX    1
>#define SCAN_FSM_STATE_PX    2
>#define SCAN_FSM_STATE_QX    3
>#define SCAN_FSM_STATE_QPX   4
>
>//Add a piece to the attack tables
>static zforceinline void add_to_attack_table_wpawn(zpos * RESTRICT p, int id,
>int sq);
>static zforceinline void add_to_attack_table_bpawn(zpos * RESTRICT p, int id,
>int sq);
>static zforceinline void add_to_attack_table_white_sweep(zpos * RESTRICT p, int
>id, int ptype, int sq);
>static zforceinline void add_to_attack_table_black_sweep(zpos * RESTRICT p, int
>id, int ptype, int sq);
>static zforceinline void add_to_attack_table_fixed(zpos * RESTRICT p, zappa_u32
>* RESTRICT tbl, int id, int ptype, int sq);
>
>//Remove a piece from the attack tables
>static zforceinline void remove_from_attack_table_wpawn(zpos * RESTRICT p, int
>id, int sq);
>static zforceinline void remove_from_attack_table_bpawn(zpos * RESTRICT p, int
>id, int sq);
>static zforceinline void remove_from_attack_table_white_sweep(zpos * RESTRICT p,
>int id, int ptype, int sq);
>static zforceinline void remove_from_attack_table_black_sweep(zpos * RESTRICT p,
>int id, int ptype, int sq);
>static zforceinline void remove_from_attack_table_fixed(zpos * RESTRICT p,
>zappa_u32 * RESTRICT tbl, int id, int ptype, int sq);
>
>//Update xray attacks - the really annoying part, in terms of complexity
>static zforceinline void add_to_attack_table_xray(zpos * RESTRICT p, zappa_u32 *
>RESTRICT tbl, int id, int ptype, int sq, int xsq);
>static zforceinline void remove_from_attack_table_xray(zpos * RESTRICT p,
>zappa_u32 * RESTRICT tbl, int id, int ptype, int sq, int xsq);
>
>
>
>
>const zappa_s8 rot90[64] = {0, 8,16,24,32,40,48,56,
>			    1, 9,17,25,33,41,49,57,
>			    2,10,18,26,34,42,50,58,
>			    3,11,19,27,35,43,51,59,
>			    4,12,20,28,36,44,52,60,
>			    5,13,21,29,37,45,53,61,
>			    6,14,22,30,38,46,54,62,
>			    7,15,23,31,39,47,55,63};
>
>//Square A in rot-space is at square h1a8rot[A] in flat-space
>const zappa_s8 roth1a8[64] = {H1, G2, F3, E4, D5, C6, B7, A8,
>			      H2, G3, F4, E5, D6, C7, B8, A1,
>			      H3, G4, F5, E6, D7, C8, B1, A2,
>			      H4, G5, F6, E7, D8, C1, B2, A3,
>			      H5, G6, F7, E8, D1, C2, B3, A4,
>			      H6, G7, F8, E1, D2, C3, B4, A5,
>			      H7, G8, F1, E2, D3, C4, B5, A6,
>			      H8, G1, F2, E3, D4, C5, B6, A7};
>
>//Square A in flatspace is at square roth1a8[A] in rot-space
>const zappa_s8 inv_roth1a8[64] = {H2, G3, F4, E5, D6, C7, B8, A1,
>				  H3, G4, F5, E6, D7, C8, B1, A2,
>				  H4, G5, F6, E7, D8, C1, B2, A3,
>				  H5, G6, F7, E8, D1, C2, B3, A4,
>				  H6, G7, F8, E1, D2, C3, B4, A5,
>				  H7, G8, F1, E2, D3, C4, B5, A6,
>				  H8, G1, F2, E3, D4, C5, B6, A7,
>				  H1, G2, F3, E4, D5, C6, B7, A8};
>
>//Square A in rot-space is at square a1h8rot[A] in flat-space
>const zappa_s8 rota1h8[64] = {A1, B2, C3, D4, E5, F6, G7, H8,
>			      B1, C2, D3, E4, F5, G6, H7, A8,
>			      C1, D2, E3, F4, G5, H6, A7, B8,
>			      D1, E2, F3, G4, H5, A6, B7, C8,
>			      E1, F2, G3, H4, A5, B6, C7, D8,
>			      F1, G2, H3, A4, B5, C6, D7, E8,
>			      G1, H2, A3, B4, C5, D6, E7, F8,
>			      H1, A2, B3, C4, D5, E6, F7, G8};
>
>//Square A in flatspace is at square roth1a8[A] in rot-space
>const zappa_s8 inv_rota1h8[64] = {A1, A2, A3, A4, A5, A6, A7, A8,
>				  B8, B1, B2, B3, B4, B5, B6, B7,
>				  C7, C8, C1, C2, C3, C4, C5, C6,
>				  D6, D7, D8, D1, D2, D3, D4, D5,
>				  E5, E6, E7, E8, E1, E2, E3, E4,
>				  F4, F5, F6, F7, F8, F1, F2, F3,
>				  G3, G4, G5, G6, G7, G8, G1, G2,
>				  H2, H3, H4, H5, H6, H7, H8, H1};
>
>//Precomputed move masks for rotated bitboards
>bitboard rr_precomp[64][64];    //rook ranks:              8*64*64 = 32kB memory
>bitboard rf_precomp[64][64];    //rook files:              8*64*64 = 32kB memory
>bitboard ba_precomp[64][64];    //bishop a1->h8 diagonals: 8*64*64 = 32kB memory
>bitboard bh_precomp[64][64];    //bishop h1->a8 diagonals: 8*64*64 = 32kB memory
>
>//Precomputed move masks for kings, pawns, and knights
>bitboard k_precomp[64];          //king moves:                  8*64 = 512B
>memory
>bitboard n_precomp[64];          //knight moves:                8*64 = 512B
>memory
>bitboard p_rightmask;            //rightmask contains all the squares not on the
>A file
>bitboard p_leftmask;             //leftmask contains all the squares not on the
>H file
>bitboard p_allmask;
>bitboard second_rank;            //Squares on the second rank
>bitboard seventh_rank;           //Squares on the seventh rank
>
>//given an array of 'delta x squares' and 'delta y squares' make a bitboard
>//with ones on those squares and zeroes everywhere else
>//also do not allow 'board wraps'
>//invariant: r is of size 8
>//called by: fill_king_precomputes(), fill_knight_precomputes()
>static zforceinline bitboard get_precomp_move_mask(const int d_x[], const int
>d_y[], int sq)
>{
>  register bitboard v, bb_one;
>  register int i, x, y;
>
>  for(i = 0, v = 0, bb_one = 1; i < 8; i++)
>  {
>    x = (sq & 7) + d_x[i];
>    y = ((sq & 56) >> 3) + d_y[i];
>
>    if((x >= 0) & (x <= 7) & (y >= 0) & (y <= 7))
>      v |= bb_one << (sq + d_x[i] + 8 * d_y[i]);
>  }
>
>  return v;
>}
>
>
>//initialize lookup tables
>void init_move_generation(void)
>{
>  //constants for knight/king attack bitboards
>  const static int d_nx[8] = {-2,-1, 1, 2, 2, 1,-1,-2}, d_ny[8] = {-1,-2,-2,-1,
>1, 2, 2, 1};
>  const static int d_kx[8] = {-1, 0, 1, 1, 1, 0,-1,-1}, d_ky[8] = {-1,-1,-1, 0,
>1, 1, 1, 0};
>  int i, j, r, k, rs_index, extfill, hoff, aoff, pcomp, mcomp, rcomp, qcomp,
>ptype, uptype, fsm_state, fsm_xstate;
>  int fsm_stop_state, fsm_type_state, xpawn, ray_xtype;
>  int barrier, xpiece, nfsm_xastate, nfsm_xrstate;
>  int nfsm_type_state, nfsm_stop_state, xminor, g;
>
>  int mgen_iter, sgen_iter, sq, x, y;                                //iterators
>  const int directions[3][9] = {{7, -7, 9, -9, 0},  //slider delta types (rooks
>can move n*1, etc)
>			        {1, -1, 8, -8, 0},
>				{1, -1, 8, -8, 7, -7, 9, -9, 0}};
>  const int dx[2][8] = {{2, 1, -1, -2, -2, -1, 1, 2}, {1, 1, 0, -1, -1, -1, 0,
>1}};
>  const int dy[2][8] = {{1, 2, 2, 1, -1, -2, -2, -1}, {0, 1, 1, 1, 0, -1, -1,
>-1}};
>
>  //Initialize edge table
>  for(i = 0; i < 64; i++)
>    edge_table[i] = (((i & 7) == 0) << 3) + (((i & 7) == 7) << 2) + (((i >> 3)
>== 7) << 1) +(((i >> 3) == 0) << 0);
>
>  //xray piece lists
>  for(i = 0; i < 8; i++)
>  {
>    g = uncompress_gradient[i];
>    for(j = 0; j < 64; j++)
>    {
>      k = 0;
>      for(sq = j; (edge_table[sq] & scan_edge_table[g+9]) == 0; sq += g)
>	xray_slider_move_table[i][j][k++] = (ray_type_table[i] << 6) + sq + g;
>      xray_slider_move_table[i][j][int_max(k-1, 0)] |= 0xC0; //EOR
>    }
>  }
>
>  //sliders
>  for(i = 0; i < 3; i++)                     //loop over piece types
>  {
>    for(j = 0; j < 64; j++)                  //loop over initial squares
>    {
>      for(k = 0; k < 29; k++)                //initialize (-1 -> end of list)
>	mgen_slider_move_table[i][j][k] = -1;
>
>      sgen_iter = mgen_iter = 0;
>      for(k = 0; directions[i][k] != 0; k++) //loop over ray
>      {
>	for(sq = j; 1;)
>	{
>	  //edge test: top
>	  if((sq >> 3) == 7 && directions[i][k] > 1)
>	    break;
>
>	  //edge test: bottom
>	  if((sq >> 3) == 0 && directions[i][k] < -1)
>	    break;
>
>	  //edge test: right
>	  if((sq & 7) == 7 && (directions[i][k] == -7 || directions[i][k] == 9 ||
>directions[i][k] == 1))
>	    break;
>
>	  //edge test: left
>	  if((sq & 7) == 0 && (directions[i][k] == 7 || directions[i][k] == -9 ||
>directions[i][k] == -1))
>	    break;
>
>	  sq += directions[i][k];                  //proceed along the ray
>	  mgen_slider_move_table[i][j][mgen_iter++] = sq; //add to our list of squares
>	}
>
>	//0: xray White Pawns, diagonal sliders
>	//1: xray Black Pawns, diagonal sliders
>	//2: xray horizontal sliders
>	//3: End of ray.
>	if(abs(directions[i][k]) % 2 == 1 && abs(directions[i][k]) > 1)
>	  ray_xtype = directions[i][k] > 0;
>	else
>	  ray_xtype = 2;
>
>	//for all the square so far: if we terminate early, go to the next ray
>	for(; sgen_iter < mgen_iter; sgen_iter++)
>	  mgen_slider_move_table[i][j][sgen_iter+32] = (((mgen_iter - sgen_iter - 1 ==
>0) ? 3 : ray_xtype) << 4) | (mgen_iter - sgen_iter - 1);
>      }
>    }
>  }
>
>  //fixed pieces
>  for(i = 0; i < 2; i++)
>  {
>    for(j = 0; j < 64; j++)
>    {
>      for(k = 0; k < 16; k++)
>	mgen_fixed_move_table[i][j][k] = -1;
>
>      for(mgen_iter = k = 0; k < 8; k++)
>      {
>	x = File(j) + dx[i][k];
>	y = Rank(j) + dy[i][k];
>
>	if(x >= 0 && x <= 7 && y >= 0 && y <= 7)
>	  mgen_fixed_move_table[i][j][mgen_iter++] = (y << 3) | x;
>      }
>    }
>  }
>
>  //precompute the castling move for easy comparison / concise code
>  wk_castle_move = m_build(0, 0, WKING, E1, G1);
>  wq_castle_move = m_build(0, 0, WKING, E1, C1);
>  bk_castle_move = m_build(0, 0, BKING, E8, G8);
>  bq_castle_move = m_build(0, 0, BKING, E8, C8);
>
>  //non xray state machine transistions and output
>  for(i = 0; i < 320; i++)
>  {
>    uptype     = (ptype = i & 0xF) >> 1;
>    ray_xtype  = (i >> 4) & 0x3;
>    fsm_state  =  i >> 6;
>
>    //are we done with this ray?
>    switch(ray_xtype)
>    {
>    case 0: barrier = (ptype == WPAWN) || (uptype == KNIGHT) || (uptype == KING)
>|| (uptype == ROOK);   break;
>    case 1: barrier = (ptype == BPAWN) || (uptype == KNIGHT) || (uptype == KING)
>|| (uptype == ROOK);   break;
>    case 2: barrier = (uptype == PAWN) || (uptype == KNIGHT) || (uptype == KING)
>|| (uptype == BISHOP); break;
>    case 3: barrier = 1; break;
>    }
>    barrier |= (fsm_state == SCAN_FSM_STATE_PX) || (fsm_state ==
>SCAN_FSM_STATE_QPX);
>
>    //skip decision
>    if(barrier)
>at_scan_skip_decision[i] = 0x0F;   //skip
>    else
>at_scan_skip_decision[i] = 0x00;   //continue
>
>    //bc decision
>    if(barrier)
>at_scan_bc_decision[i] = SCAN_FSM_STATE_NOX;
>    else if(uptype == QUEEN)
>at_scan_bc_decision[i] = SCAN_FSM_STATE_QX;
>    else if(uptype == PAWN && fsm_state <  SCAN_FSM_STATE_QX)
>at_scan_bc_decision[i] = SCAN_FSM_STATE_PX;
>    else if(uptype == PAWN && fsm_state >= SCAN_FSM_STATE_QX)
>at_scan_bc_decision[i] = SCAN_FSM_STATE_QPX;
>    else if(ptype > 0      && fsm_state <  SCAN_FSM_STATE_QX)
>at_scan_bc_decision[i] = SCAN_FSM_STATE_MX;
>    else
>at_scan_bc_decision[i] = fsm_state;
>  }
>
>  //xray xsquare state machine transistion
>  for(i = 0; i < 256; i++)
>  {
>    //uncompress
>    uptype         = (ptype = i & 0xF) >> 1;
>    ray_xtype      = (i >> 4) & 0x3;
>    fsm_type_state = (i >> 6) & 0x1;  //M/Q
>    fsm_xstate     = (i >> 7) & 0x1;
>
>    xminor = (ray_xtype < 2 && uptype == BISHOP) || (ray_xtype >= 2 && uptype ==
>ROOK);
>    xpawn  = (ray_xtype == 0 && ptype == BPAWN) || (ray_xtype == 1 && ptype ==
>WPAWN);
>
>    //First transistion: is the piece we are adding/subtracting a slider?
>    if(xminor)                                        nfsm_type_state = 3;
>          //0
>    else if(fsm_type_state == 0 && uptype == QUEEN)   nfsm_type_state = 2;
>          //M-Q
>    else if(fsm_type_state == 1 && uptype == QUEEN)   nfsm_type_state = 3;
>          //0
>    else if(fsm_type_state == 0 && xpawn)             nfsm_type_state = 4;
>          //M-P
>    else if(fsm_type_state == 1 && xpawn)             nfsm_type_state = 5;
>          //Q-P
>    else                                              nfsm_type_state =
>fsm_type_state;    //M/Q
>
>    //stop state is always 0
>    nfsm_stop_state = 0;
>
>    //xstate: insertion is simply "do we xray", without caring what is on the
>square
>    nfsm_xastate = fsm_xstate;
>
>    //Removal: we do count the piece on the xsquare
>    nfsm_xrstate = !xpawn && !xminor && uptype != QUEEN;
>
>
>
>    //build and store
>    at_xray_add_xsquare[i] = (nfsm_type_state << 2) | (nfsm_stop_state << 1) |
>(nfsm_xastate);
>    at_xray_rem_xsquare[i] = (nfsm_type_state << 2) | (nfsm_stop_state << 1) |
>(nfsm_xrstate);
>  }
>
>  //xray state machine transistions
>  for(i = 0; i < 1536; i++)
>  {
>    //uncompress
>    uptype         = (ptype = i & 0xF) >> 1;
>    ray_xtype      = (i >> 4) & 0x3;
>    fsm_xstate     = (i >> 6) & 0x1;
>    fsm_stop_state = (i >> 7) & 0x1;
>    fsm_type_state = (i >> 8);
>
>    //translate info into things that actually cause state changes
>
>    //do we stop here?
>    switch(ray_xtype)
>    {
>    case 0: barrier = (ptype == WPAWN) || (uptype == KNIGHT) || (uptype == KING)
>|| (uptype == ROOK);   break;
>    case 1: barrier = (ptype == BPAWN) || (uptype == KNIGHT) || (uptype == KING)
>|| (uptype == ROOK);   break;
>    case 2: barrier = (uptype == PAWN) || (uptype == KNIGHT) || (uptype == KING)
>|| (uptype == BISHOP); break;
>    case 3: barrier = 1; break;
>    }
>
>    barrier |= fsm_stop_state == 1; //also stop if we saw a pawn previously.
>
>    //xray pawn?
>    switch(ray_xtype)
>    {
>    case 0: xpawn = (ptype == BPAWN); break;
>    case 1: xpawn = (ptype == WPAWN); break;
>    case 2:
>    case 3: xpawn = 0; break;
>    }
>
>    //will we scan through this piece
>    xpiece          = ptype != EMPTY && !barrier;
>
>    //Input:     4 bits ptype, 2 bits decision.
>    //FSM State: 1 bit xray, 1 bit stop_next, 6 states type_state
>
>    //type state machine: on insertion +T, on removal -T
>    //initial state    M: We are a minor piece, and we attack this xsquare
>either directly or through another minor.
>    //                    p->board[xsquare] was not a minor piece or a queen.
>    //initial state    Q: We are a minor piece xraying a queen, or a queen.
>    //                    p->board[xsquare] was not a minor piece or a queen.
>    //initial state: M-Q: We are a minor piece, and we attack xsquare directly
>or through another minor.
>    //                    p->board[xsquare] was a queen.
>    //initial state:   0: We were "cancelled out" by whatever we found at
>xsquare.  For example, we are a minor
>    //                    and xsquare is a minor (no change needed).
>    //initial state  M-P: We are a minor piece, attack square directly or
>through another minor, and p->board[xsquare]
>    //                    was an xray pawn.
>    //initial state  Q-P: We are a minor xraying a queen, or a queen, and
>p->board[xsquare] was an xray pawn.
>
>    //state transistion diagram
>    //   M   + Q -> Q
>    //   M   + * -> M
>    //   Q   + * -> Q
>    //   M-Q + Q -> 0
>    //   M-Q + * -> M-Q
>    //   0   + * -> 0
>    //   M-P + Q -> Q
>    //   M-P + * -> M
>    //   Q-P + * -> Q
>
>    //stop state machine: (not really a state machine) handle pawn xray
>    //state           0:  Previous square was not an xray pawn
>    //state           1:  Previous square was not xsquare and an xray pawn
>
>    //Insertion xray state machine
>    //state           0:  After removing the piece on xsquare, we will attack
>this square directly
>    //state           1:  After removing the piece on xsquare, we will attack
>this square indirectly
>    //transistions:
>    //   1 + *        -> 1
>    //   0 + XP       -> 1
>    //   0 + *        -> 0
>
>    //Removal xray state machine
>    //state           0:  After inserting the piece on xsquare, we will attack
>this square indirectly
>    //state           1:  After inserting the piece on xsquare, we will not
>attack this square at all.
>    //
>    //Most of the work here is done before.  The only trick is that on xraying a
>Pawn, we go from 0-1
>    //immediately afterward.  Otherwise, there is no change.
>    //   1 + *         -> 1
>    //   0 + ts > 4    -> 1
>    //   0 + *         -> 0
>
>    //stop state
>    nfsm_stop_state = xpawn;
>
>    //type state
>    if(fsm_type_state == 0 && uptype == QUEEN)       nfsm_type_state = 1;
>    else if(fsm_type_state == 2 && uptype == QUEEN)  nfsm_type_state = 3;
>    else if(fsm_type_state == 4 && uptype == QUEEN)  nfsm_type_state = 1;
>    else if(fsm_type_state == 4 && uptype != QUEEN)  nfsm_type_state = 0;
>    else if(fsm_type_state == 5)                     nfsm_type_state = 1;
>    else                                             nfsm_type_state =
>fsm_type_state;
>
>    //insertion xray state
>    if(fsm_xstate == 0 && xpiece)                    nfsm_xastate = 1;
>    else                                             nfsm_xastate = fsm_xstate;
>
>    //removal xray state
>    if(fsm_type_state > 3)                           nfsm_xrstate = 1;
>    else                                             nfsm_xrstate = fsm_xstate;
>
>
>    //compress
>    at_xray_add_decision[i] = (barrier << 7) | (nfsm_type_state << 2) |
>(nfsm_stop_state << 1) | (nfsm_xastate);
>    at_xray_rem_decision[i] = (barrier << 7) | (nfsm_type_state << 2) |
>(nfsm_stop_state << 1) | (nfsm_xrstate);
>  }
>
>
>  //fill gradient vector and ray information
>  for(i = 0; i < 64; i++)
>  {
>    for(j = 0; j < 64; j++)                                   //start by zeroing
>out everything
>      grad_vector[i][j] = 0;
>
>    bhp_ray(i) = bhm_ray(i) = bap_ray(i) = bam_ray(i) = (zappa_u64)0;
>    rfp_ray(i) = rfm_ray(i) = rrp_ray(i) = rrm_ray(i) = (zappa_u64)0;
>
>    for(j = i; (j & 7) != 0 && (j >> 3) != 7;) {              //if we are not on
>the top/left of the board
>      j += 7;                                                 //advance in the
>h1->a8 direction (+7)
>      bhp_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = 7;                                  //note our
>direction
>    }
>
>    for(j = i; (j & 7) != 7 && (j >> 3) != 0;) {              //if we are not on
>the bottom/right of the board
>      j -= 7;                                                 //advance in the
>a8->h1 direction (-7)
>      bhm_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = -7;                                 //note our
>direction
>    }
>
>    for(j = i; (j & 7) != 7 && (j >> 3) != 7;) {              //if we are not on
>the top/right of the board
>      j += 9;                                                 //advance in the
>a1->h8 direction (+9)
>      bap_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = 9;                                  //note our
>direction
>    }
>
>    for(j = i; (j & 7) != 0 && (j >> 3) != 0;) {              //if we are not on
>the bottom/left of the board
>      j -= 9;                                                 //advance in the
>h8->a1 direction (-9)
>      bam_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = -9;                                 //note our
>direction
>    }
>
>    for(j = i; (j >> 3) != 7;) {                              //if we are not on
>the top of the board
>      j += 8;                                                 //advance up the
>file (+8)
>      rfp_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = 8;                                  //note our
>direction
>    }
>
>    for(j = i; (j >> 3) != 0;) {                              //if we are not on
>the bottom of the board
>      j -= 8;                                                 //advance down the
>file (-8)
>      rfm_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = -8;                                 //note our
>direction
>    }
>
>    for(j = i; (j & 7) != 7;) {                               //if we are not on
>the right of the board
>      j += 1;                                                 //advance right on
>the rank (+1)
>      rrp_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = 1;                                  //note our
>direction
>    }
>
>    for(j = i; (j & 7) != 0;) {                               //if we are not on
>the left of the board
>      j -= 1;                                                 //advance left on
>the rank (-1)
>      rrm_ray(i) |= (zappa_u64)1 << j;                        //add to ray
>      grad_vector[i][j] = -1;                                 //note our
>direction
>    }
>
>    b_rays(i) = bhp_ray(i) | bhm_ray(i) | bap_ray(i) | bam_ray(i);
>    r_rays(i) = rrm_ray(i) | rrp_ray(i) | rfm_ray(i) | rfp_ray(i);
>  }
>
>
>  //rotated bitboard attack generation
>  for(i = 0; i < 64; i++) //square loop
>  {
>    //Knights and Kings are easy
>    k_precomp[i] = get_precomp_move_mask(d_kx, d_ky, i);
>    n_precomp[i] = get_precomp_move_mask(d_nx, d_ny, i);
>
>    for(j = 0; j < 64; j++) //combination loop
>    {
>      //init
>      rf_precomp[i][j] = rr_precomp[i][j] = ba_precomp[i][j] = bh_precomp[i][j]
>= (zappa_u64)0;
>
>      //rook file precomputes
>      for(r = ((i & 56) >> 3) - 1; r >= 0; r--) {
>	rf_precomp[i][j] |= ((zappa_u64)1 << ((i & 7) + (r << 3)));
>	if(((1 << r) & (j << 1)) != 0)
>	  break;
>      }
>      for(r = ((i & 56) >> 3) + 1 ; r < 8; r++) {
>	rf_precomp[i][j] |= ((zappa_u64)1 << ((i & 7) + (r << 3)));
>	if(((1 << r) & (j << 1)) != 0)
>	  break;
>      }
>
>      //rook rank precomputes
>      for(r = (i & 7) - 1; r >= 0; r--) {
>	rr_precomp[i][j] |= ((zappa_u64)1 << ((i & 56) + r));
>	if(((1 << r) & (j << 1)) != 0)
>	  break;
>      }
>      for(r = (i & 7) + 1 ; r < 8; r++) {
>	rr_precomp[i][j] |= ((zappa_u64)1 << ((i & 56) + r));
>	if(((1 << r) & (j << 1)) != 0)
>	  break;
>      }
>
>      extfill = (j << 1); //our /4 trick
>
>      //h1a8 precomputes
>      //rs_index = index in bit: shift offset - byte offset
>      hoff = (i + Rank(i) - 7) & 7;
>      rs_index = inv_roth1a8[i] - hoff*8;
>      for(k = rs_index; 1;)
>      {
>	//1. Check for overflow out of byte
>	if(k < 0 || k > 7) break;
>
>	//break if we are about to go off the edge of the real board
>	if(Rank(roth1a8[k + hoff*8]) == RANK8 || File(roth1a8[k + hoff*8]) == FILEA)
>break;
>
>	//Add square to attack list
>	k++; bh_precomp[i][j] |= (zappa_u64)1 << roth1a8[k + hoff*8];
>
>	//break on non-empty square
>	if((extfill >> k & 1)) break;
>      }
>      for(k = rs_index; 1;)
>      {
>	//1. Check for overflow out of byte
>	if(k < 0 || k > 7) break;
>
>	//break if we are about to go off the edge of the real board
>	if(Rank(roth1a8[k + hoff*8]) == RANK1 || File(roth1a8[k + hoff*8]) == FILEH)
>break;
>
>	//Add square to attack list
>	k--; bh_precomp[i][j] |= (zappa_u64)1 << roth1a8[k + hoff*8];
>
>	//break on non-empty square
>	if((extfill >> k & 1)) break;
>      }
>
>      aoff = (i - Rank(i)) & 7;
>      rs_index = inv_rota1h8[i] - aoff*8;
>      for(k = rs_index; 1;)
>      {
>	//1. Check for overflow out of byte
>	if(k < 0 || k > 7) break;
>
>	//break if we are about to go off the edge of the real board
>	if(Rank(rota1h8[k + aoff*8]) == RANK8 || File(rota1h8[k + aoff*8]) == FILEH)
>break;
>
>	//Add square to attack list
>	k++; ba_precomp[i][j] |= (zappa_u64)1 << rota1h8[k + aoff*8];
>
>	//break on non-empty square
>	if((extfill >> k & 1)) break;
>      }
>      for(k = rs_index; 1;)
>      {
>	//1. Check for overflow out of byte
>	if(k < 0 || k > 7) break;
>
>	//break if we are about to go off the edge of the real board
>	if(Rank(rota1h8[k + aoff*8]) == RANK1 || File(rota1h8[k + aoff*8]) == FILEA)
>break;
>
>	//Add square to attack list
>	k--; ba_precomp[i][j] |= (zappa_u64)1 << rota1h8[k + aoff*8];
>
>	//break on non-empty square
>	if((extfill >> k & 1)) break;
>      }
>    }
>  }
>
>  //Fill in at_compress table
>  for(i = 0; i < 1024; i++)
>  {
>    //Break down into attackers of each type
>    pcomp = pawn_attackers(i);
>    mcomp = knight_attackers(i) + bishop_attackers(i);
>    rcomp = rook_attackers(i);
>    qcomp = queen_attackers(i) + king_attackers(i);
>
>    //Spill over into higher buckets && saturate
>    if(pcomp > 1) { mcomp += pcomp - 1;  pcomp = 1; }
>    if(mcomp > 3) { rcomp += mcomp - 3;  mcomp = 3; }
>    if(rcomp > 1) { qcomp += rcomp - 1;  rcomp = 1; }
>    qcomp = int_min(qcomp, 3);
>
>    at_compress_table[i] = pcomp + 4*mcomp + 8*rcomp + 16*qcomp;
>  }
>
>  p_rightmask  = 0x7F7F7F7F7F7F7F7FULL;   //rightmask contains all the squares
>not on the A file
>  p_leftmask   = 0xFEFEFEFEFEFEFEFEULL;   //leftmask contains all the squares
>not on the H file
>  p_allmask    = 0x7E7E7E7E7E7E7E7EULL;   //allmask contains all squares not on
>the A or H files
>  second_rank  = 0x000000000000FF00ULL;   //All squares on the second rank
>  seventh_rank = 0x00FF000000000000ULL;   //All squares on the seventh rank
>}
>
>extern zappa_u8 is_horiz_slider[8];
>extern zappa_u8 is_diag_slider[8];
>
>void add_to_attack_table(zpos *p, int ptype, int id, int sq)
>{
>  int i, a;
>
>  switch(ptype & 0xF)
>  {
>  case BPAWN:    add_to_attack_table_bpawn(p, id, sq); break;
>  case WPAWN:    add_to_attack_table_wpawn(p, id, sq); break;
>  case BKNIGHT:
>  case WKNIGHT:
>  case BKING:
>  case WKING:    add_to_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq);
>break;
>  case BBISHOP:
>  case BROOK:
>  case BQUEEN:   add_to_attack_table_black_sweep(p, id, ptype, sq); break;
>  case WBISHOP:
>  case WROOK:
>  case WQUEEN:   add_to_attack_table_white_sweep(p, id, ptype, sq); break;
>  }
>
>  for(i = 0; i < 2; i++)
>  {
>    for(a = (p->at[i][sq+64] >> 16) & p->slideridm[i]; a != 0; a &= a-1) {
>      id = lead_one_32(a);  //shamelessly reusing a variable
>     	remove_from_attack_table_xray(p, p->at[i], id, pl_ptype(p->pl[i][id]), sq,
>pl_square(p->pl[i][id]));
>    }
>  }
>}
>
>//non-incremental version
>void add_to_attack_table_noxray(zpos *p, int ptype, int id, int sq)
>{
>  switch(ptype & 0xF)
>  {
>  case BPAWN:    add_to_attack_table_bpawn(p, id, sq); break;
>  case WPAWN:    add_to_attack_table_wpawn(p, id, sq); break;
>  case BKNIGHT:
>  case WKNIGHT:
>  case BKING:
>  case WKING:    add_to_attack_table_fixed(p, p->at[ptype & 1], id, ptype, sq);
>break;
>  case BBISHOP:
>  case BROOK:
>  case BQUEEN:   add_to_attack_table_black_sweep(p, id, ptype, sq); break;
>  case WBISHOP:
>  case WROOK:
>  case WQUEEN:   add_to_attack_table_white_sweep(p, id, ptype, sq); break;
>  default:       break;
>  }
>}
>
>void remove_from_attack_table(zpos *p, int sq)
>{
>  int i, id, ptype;
>  unsigned int a;
>
>  ptype = p->board[sq];
>  id = p->pboard[sq];
>
>  switch(ptype & 0xF)
>  {
>  case BPAWN:    remove_from_attack_table_bpawn(p, id, sq); break;
>  case WPAWN:    remove_from_attack_table_wpawn(p, id, sq); break;
>  case BKNIGHT:
>  case WKNIGHT:
>  case BKING:
>  case WKING:    remove_from_attack_table_fixed(p, p->at[ptype & 1], id, ptype,
>sq);  break;
>  case BBISHOP:
>  case BROOK:
>  case BQUEEN:   remove_from_attack_table_black_sweep(p, id, ptype, sq); break;
>  case WBISHOP:
>  case WROOK:
>  case WQUEEN:   remove_from_attack_table_white_sweep(p, id, ptype, sq); break;
>  }
>
>  for(i = 0; i < 2; i++)
>  {
>    for(a = (p->at[i][sq+64] >> 16) & p->slideridm[i]; a != 0; a &= a-1) {
>      id = lead_one_32(a);  //shamelessly reusing a variable
>      add_to_attack_table_xray(p, p->at[i], id, pl_ptype(p->pl[i][id]), sq,
>pl_square(p->pl[i][id]));
>    }
>  }
>}
>
>static zforceinline void add_to_attack_table_xray(zpos * RESTRICT p, zappa_u32 *
>RESTRICT tbl, int id, int ptype, int sq, int xsq)
>{
>  int dsq, rindex, ip, bc, cs, i;
>  zappa_u32 mt, xid[2], xtype[6];
>  zappa_u8 *move_table;
>
>  ATTACK_TABLE_PROFILE(at_xray_calls++);
>
>  //What type of ray are we?
>  rindex = compress_gradient[(dsq = grad(xsq, sq)) + 9];
>
>  //Check for early exit.  For example, if we are a Bishop on B7
>  //and "xraying" H1, we clearly have no squares to update.  90% of the time not
>taken,
>  //so it shouldn't hurt the branch predictor too much.
>  if((scan_edge_table[dsq+9] & edge_table[sq]) != 0)
>    return;
>
>  ip = (ptype >= BQUEEN);
>
>  //Check to see if we directly attack the square - this means
>  //we have to scan to find out our type.
>  if((tbl[sq+64] & (tbl[sq+64] >> 16) & (1 << id)) == 0)
>  {
>    //Another early exit case: We attacked through a pawn, e.g. B P p ->
>    //we don't need to do anything at all.
>    if((p->board[sq - dsq] >> 1) == PAWN)
>      return;
>
>    //Prescan
>    for(i = xsq+dsq; i != sq; i += dsq)
>      ip |= (p->board[i] >= BQUEEN);
>
>    ip |= 2; //We know we are xraying something, set xray bit.
>  }
>
>  //OK, we know we are going to have to do some dirty work.
>
>  //Initialize conditions
>  bc = at_xray_add_xsquare[(ip << 6) + (ray_type_table[rindex] << 4) +
>p->board[sq]];
>  cs = *(move_table = xray_slider_move_table[rindex][sq]) & 0x3F;
>
>  //initialize type
>  mt = at_inc(ptype);
>  xtype[0] = mt;  xtype[1] = at_inc(BQUEEN); xtype[2] = mt - at_inc(BQUEEN);
>  xtype[3] = xtype[4] = xtype[5] = 0;
>
>  //initialize ID
>  xid[0] = (1 << (id + 16)) | (1 << id);
>  xid[1] = 1 << (id + 16);
>
>  //Do the actual update
>  do {
>    ATTACK_TABLE_PROFILE(at_xray_squares++);
>    tbl[cs]    += xtype[bc >> 2];
>    tbl[cs+64] |= xid[bc & 1];
>    bc          = (bc << 6) | ((*move_table & 0xC0) >> 2) | p->board[cs];
>    bc          = at_xray_add_decision[bc];
>    move_table += 1;
>    cs          = *move_table & 0x3F;
>  } while((bc & 0x80) == 0);
>}
>
>static zforceinline void remove_from_attack_table_xray(zpos * RESTRICT p,
>zappa_u32 *RESTRICT tbl, int id, int ptype, int sq, int xsq)
>{
>  int dsq, rindex, ip, bc, cs, i;
>  zappa_u32 mt, xid[2], xtype[6];
>  zappa_u8 *move_table;
>
>  ATTACK_TABLE_PROFILE(at_xray_calls++);
>
>  //What type of ray are we?
>  rindex = compress_gradient[(dsq = grad(xsq, sq)) + 9];
>
>  //Check for early exit.  For example, if we are a Bishop on B7
>  //and "xraying" H1, we clearly have no squares to update.  90% of the time not
>taken,
>  //so it shouldn't hurt the branch predictor too much.
>  if((scan_edge_table[dsq+9] & edge_table[sq]) != 0)
>    return;
>
>  ip = (ptype >= BQUEEN);
>
>  //Check to see if we directly attack the square - this means
>  //we have to scan to find out our type.
>  if((tbl[sq+64] & (tbl[sq+64] >> 16) & (1 << id)) == 0)
>  {
>    //Another early exit case: We attacked through a pawn, e.g. B P p ->
>    //we don't need to do anything at all.
>    if((p->board[sq - dsq] >> 1) == PAWN)
>      return;
>
>    //Prescan
>    for(i = xsq+dsq; i != sq; i += dsq)
>      ip |= (p->board[i] >= BQUEEN);
>
>    ip |= 2; //We know we are xraying something, set xray bit.
>  }
>
>  //OK, we know we are going to have to do some dirty work.
>
>  //Initialize conditions
>  bc = at_xray_rem_xsquare[(ip << 6) + (ray_type_table[rindex] << 4) +
>p->board[sq]];
>  cs = *(move_table = xray_slider_move_table[rindex][sq]) & 0x3F;
>
>  //initialize type
>  mt = at_inc(ptype);
>  xtype[0] = mt;  xtype[1] = at_inc(BQUEEN); xtype[2] = mt - at_inc(BQUEEN);
>  xtype[3] = xtype[4] = xtype[5] = 0;
>
>  //initialize ID
>  xid[0] = ~(1 << id);                      //reset lower bit only
>  xid[1] = ~((1 << (id + 16)) | (1 << id)); //reset both bits
>
>  //Do the actual update
>  do {
>    ATTACK_TABLE_PROFILE(at_xray_squares++);
>    tbl[cs]    -= xtype[bc >> 2];
>    tbl[cs+64] &= xid[bc & 1];
>    bc          = (bc << 6) | ((*move_table & 0xC0) >> 2) | p->board[cs];
>    bc          = at_xray_rem_decision[bc];
>    move_table += 1;
>    cs          = *move_table & 0x3F;
>  } while((bc & 0x80) == 0);
>}
>
>//
>//Functions to add a piece to the attack tables
>//
>
>static zforceinline void add_to_attack_table_wpawn(zpos * RESTRICT p, int id,
>int sq)
>{
>	int aid = ((1 << 16) + 1) << id;
>
>  if(!issquare_tr(sq)) {
>    p->at[1][sq+9]    += at_inc(BPAWN);
>    p->at[1][sq+9+64] += aid;
>  }
>
>  if(!issquare_tl(sq)) {
>    p->at[1][sq+7]    += at_inc(BPAWN);
>    p->at[1][sq+7+64] += aid;
>  }
>}
>
>static zforceinline void add_to_attack_table_bpawn(zpos * RESTRICT p, int id,
>int sq)
>{
>	int aid = ((1 << 16) + 1) << id;
>
>  if(!issquare_bl(sq)) {
>    p->at[0][sq-9]    += at_inc(BPAWN);
>    p->at[0][sq-9+64] += aid;
>  }
>
>  if(!issquare_br(sq)) {
>    p->at[0][sq-7]    += at_inc(BPAWN);
>    p->at[0][sq-7+64] += aid;
>  }
>}
>
>static zforceinline void add_to_attack_table_white_sweep(zpos * RESTRICT p, int
>id, int ptype, int sq)
>{
>  int target, cs, bc, pb, mt, _id, _xid;
>  zappa_u32 xtype[5], xid[5];
>  zappa_s8 * RESTRICT move_table;
>
>  ATTACK_TABLE_PROFILE(at_sweep_calls++);
>
>  //Select which list of moves to use, initialize cs to first element and bc to
>zero.
>  bc = 0;
>  cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
>
>  _id  = (1 << id) | (1 << (id + 16));
>  _xid = 1 << (id + 16);
>
>  //Build our table on the fly.  This is actually pretty fast: no dependencies,
>high IPC,
>  //and it merges with the prologue anyway. Athlon can do two 32 bit
>stores/cycle, P4 only one.
>  xid[0] = _id;                                    xid[1] = xid[2] = xid[3] =
>xid[4] = _xid;
>  xtype[0] = xtype[1] = xtype[2] = at_inc(ptype);  xtype[3] = xtype[4] =
>at_inc(BQUEEN);
>
>  //1 iteration is 18 cycles on an Athlon XP
>  do {
>    ATTACK_TABLE_PROFILE(at_sweep_squares++);
>    pb               = p->board[cs];
>    mt               = *(move_table+32);
>    p->at[1][cs]    += xtype[bc];
>    p->at[1][cs+64] += xid[bc];
>    bc               = (bc << 6) + (mt & 0x30) + pb;
>    target           = mt & at_scan_skip_decision[bc];
>    move_table      += 1;
>    bc               = at_scan_bc_decision[bc];
>    move_table      += target;
>  } while((cs = *move_table) >= 0);
>}
>
>static zforceinline void add_to_attack_table_black_sweep(zpos * RESTRICT p, int
>id, int ptype, int sq)
>{
>  int target, cs, bc, pb, mt, _id, _xid;
>  zappa_u32 xtype[5], xid[5];
>  zappa_s8 * RESTRICT move_table;
>
>  ATTACK_TABLE_PROFILE(at_sweep_calls++);
>
>  //Select which list of moves to use, initialize cs to first element and bc to
>zero.
>  bc = 0;
>  cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
>
>  _id  = (1 << id) | (1 << (id + 16));
>  _xid = 1 << (id + 16);
>
>  //Build our table on the fly.  This is actually pretty fast: no dependencies,
>high IPC,
>  //and it merges with the prologue anyway. Athlon can do two 32 bit
>stores/cycle, P4 only one.
>  xid[0] = _id;                                    xid[1] = xid[2] = xid[3] =
>xid[4] = _xid;
>  xtype[0] = xtype[1] = xtype[2] = at_inc(ptype);  xtype[3] = xtype[4] =
>at_inc(BQUEEN);
>
>  //1 iteration is 18 cycles on an Athlon XP
>  do {
>    ATTACK_TABLE_PROFILE(at_sweep_squares++);
>    pb               = p->board[cs];
>    mt               = *(move_table+32);
>    p->at[0][cs]    += xtype[bc];
>    p->at[0][cs+64] += xid[bc];
>    bc               = (bc << 6) + (mt & 0x30) + pb;
>    target           = mt & at_scan_skip_decision[bc];
>    move_table      += 1;
>    bc               = at_scan_bc_decision[bc];
>    move_table      += target;
>  } while((cs = *move_table) >= 0);
>}
>
>//Sliders are annoying: fixed pieces are much more straightforward
>static zforceinline void add_to_attack_table_fixed(zpos * RESTRICT p, zappa_u32
>* RESTRICT tbl, int id, int ptype, int sq)
>{
>  int cs;
>  zappa_u32 xid, type;
>  zappa_s8 *move_table;
>
>  ATTACK_TABLE_PROFILE(at_fixed_calls++);
>  xid = (1 << id) | (1 << (id + 16));
>  type = at_inc(ptype);
>
>  //initialization
>  move_table = mgen_fixed_move_table[zt_fixed_index[ptype]][sq];
>  cs = *move_table;
>
>  do {
>    ATTACK_TABLE_PROFILE(at_fixed_squares++);
>    tbl[cs]    += type;
>    tbl[cs+64] += xid;
>    move_table++;
>  } while((cs = *move_table) >= 0);
>}
>
>//
>//Functions to remove a piece from the attack tables
>//
>
>static zforceinline void remove_from_attack_table_bpawn(zpos * RESTRICT p, int
>id, int sq)
>{
>  int xid = ~((1 << (id + 16)) + (1 << id));
>
>  if(!issquare_bl(sq)) {
>    p->at[0][sq-9]    -= at_inc(BPAWN);
>    p->at[0][sq-9+64] &= xid;
>  }
>
>  if(!issquare_br(sq)) {
>    p->at[0][sq-7]    -= at_inc(BPAWN);
>    p->at[0][sq-7+64] &= xid;
>  }
>}
>
>static zforceinline void remove_from_attack_table_wpawn(zpos * RESTRICT p, int
>id, int sq)
>{
>  int xid = ~((1 << (id + 16)) + (1 << id));
>
>  if(!issquare_tr(sq)) {
>    p->at[1][sq+9]    -= at_inc(BPAWN);
>    p->at[1][sq+9+64] &= xid;
>  }
>
>  if(!issquare_tl(sq)) {
>    p->at[1][sq+7]    -= at_inc(BPAWN);
>    p->at[1][sq+7+64] &= xid;
>  }
>}
>
>static zforceinline void remove_from_attack_table_white_sweep(zpos * RESTRICT p,
>int id, int ptype, int sq)
>{
>  int target, cs, bc, pb, mt, _id, _xid;
>  zappa_u32 xtype[5], xid[5];
>  zappa_s8 * RESTRICT move_table;
>
>  ATTACK_TABLE_PROFILE(at_sweep_calls++);
>
>
>  //Select which list of moves to use, initialize cs to first element and bc to
>zero.
>  bc = 0;
>  cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
>
>  _id  = (1 << id) | (1 << (id + 16));
>  _xid = 1 << (id + 16);
>
>  //Build our table on the fly.  This is actually pretty fast: no dependencies,
>high IPC,
>  //and it merges with the prologue anyway. Athlon can do two 32 bit
>stores/cycle, P4 only one.
>  xid[0] = _id;                                    xid[1] = xid[2] = xid[3] =
>xid[4] = _xid;
>  xtype[0] = xtype[1] = xtype[2] = at_inc(ptype);  xtype[3] = xtype[4] =
>at_inc(BQUEEN);
>
>  //1 iteration is 18 cycles on an Athlon XP
>  do {
>    ATTACK_TABLE_PROFILE(at_sweep_squares++);
>    pb               = p->board[cs];
>    mt               = *(move_table+32);
>    p->at[1][cs]    -= xtype[bc];
>    p->at[1][cs+64] -= xid[bc];
>    bc               = (bc << 6) + (mt & 0x30) + pb;
>    target           = mt & at_scan_skip_decision[bc];
>    move_table      += 1;
>    bc               = at_scan_bc_decision[bc];
>    move_table      += target;
>  } while((cs = *move_table) >= 0);
>}
>
>static zforceinline void remove_from_attack_table_black_sweep(zpos * RESTRICT p,
>int id, int ptype, int sq)
>{
>  int target, cs, bc, pb, mt, _id, _xid;
>  zappa_u32 xtype[5], xid[5];
>  zappa_s8 * RESTRICT move_table;
>
>  ATTACK_TABLE_PROFILE(at_sweep_calls++);
>
>  //Select which list of moves to use, initialize cs to first element and bc to
>zero.
>  bc = 0;
>  cs = *(move_table = mgen_slider_move_table[(ptype >> 1) - BISHOP][sq]);
>
>  _id  = (1 << id) | (1 << (id + 16));
>  _xid = 1 << (id + 16);
>
>  //Build our table on the fly.  This is actually pretty fast: no dependencies,
>high IPC,
>  //and it merges with the prologue anyway. Athlon can do two 32 bit
>stores/cycle, P4 only one.
>  xid[0] = _id;                                    xid[1] = xid[2] = xid[3] =
>xid[4] = _xid;
>  xtype[0] = xtype[1] = xtype[2] = at_inc(ptype);  xtype[3] = xtype[4] =
>at_inc(BQUEEN);
>
>  //1 iteration is 18 cycles on an Athlon XP
>  do {
>    ATTACK_TABLE_PROFILE(at_sweep_squares++);
>    pb               = p->board[cs];
>    mt               = *(move_table+32);
>    p->at[0][cs]    -= xtype[bc];
>    p->at[0][cs+64] -= xid[bc];
>    bc               = (bc << 6) + (mt & 0x30) + pb;
>    target           = mt & at_scan_skip_decision[bc];
>    move_table      += 1;
>    bc               = at_scan_bc_decision[bc];
>    move_table      += target;
>  } while((cs = *move_table) >= 0);
>}
>
>//Sliders are annoying: fixed pieces are much more straightforward
>static zforceinline void remove_from_attack_table_fixed(zpos * RESTRICT p,
>zappa_u32 * RESTRICT tbl, int id, int ptype, int sq)
>{
>  int cs;
>  zappa_u32 xid, type;
>  zappa_s8 *move_table;
>
>  ATTACK_TABLE_PROFILE(at_fixed_calls++);
>
>	xid = (1 << id) | (1 << (id + 16));
>	type = at_inc(ptype);
>
>  //initialization
>  move_table = mgen_fixed_move_table[zt_fixed_index[ptype]][sq];
>  cs = *move_table;
>
>  do {
>    ATTACK_TABLE_PROFILE(at_fixed_squares++);
>    tbl[cs]    -= type;
>    tbl[cs+64] -= xid;
>    move_table++;
>  } while((cs = *move_table) >= 0);
>}
>
>void print_attack_table_profile_info(void)
>{
>#ifdef ATTACK_TABLE_PROFILING
>	printf("Sweep square average: %f\n", (float)((double)(at_sweep_squares) /
>(double)(at_sweep_calls)));
>	printf("Fixed square average: %f\n", (float)((double)(at_fixed_squares) /
>(double)(at_fixed_calls)));
>	printf("Xray  square average: %f\n", (float)((double)(at_xray_squares)  /
>(double)(at_xray_calls)));
>	printf("Sweep Calls: %d\n", (int)at_sweep_calls);
>	printf("Fixed Calls: %d\n", (int)at_fixed_calls);
>	printf("Xray Calls:  %d\n", (int)at_xray_calls);
>#endif
>}



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.