Author: Dann Corbit
Date: 17:17:31 07/17/03
Go up one level in this thread
On July 17, 2003 at 17:37:38, Russell Reagan wrote:
>On July 17, 2003 at 13:42:07, Dann Corbit wrote:
>
>>Bitboard lo_bit1(Bitboard n)
>>{
>> return n & -n;
>>}
>
>Why would any of the other functions (or any assembly code) be faster than this?
>This looks pretty "bare bones" to me.
The integers are 64 bits, and the compiler may issue a large number of
instructions. In the actual case, it is pretty spartan assembly that is
emitted:
C:\tmp>cl /Ox /G6 /Fa /c lobit.c
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 12.00.8804 for 80x86
Copyright (C) Microsoft Corp 1984-1998. All rights reserved.
lobit.c
C:\tmp>type lobit.asm
; Listing generated by Microsoft (R) Optimizing Compiler Version 12.00.9044.0
TITLE lobit.c
.386P
include listing.inc
if @Version gt 510
.model FLAT
else
_TEXT SEGMENT PARA USE32 PUBLIC 'CODE'
_TEXT ENDS
_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
_DATA ENDS
CONST SEGMENT DWORD USE32 PUBLIC 'CONST'
CONST ENDS
_BSS SEGMENT DWORD USE32 PUBLIC 'BSS'
_BSS ENDS
$$SYMBOLS SEGMENT BYTE USE32 'DEBSYM'
$$SYMBOLS ENDS
_TLS SEGMENT DWORD USE32 PUBLIC 'TLS'
_TLS ENDS
FLAT GROUP _DATA, CONST, _BSS
ASSUME CS: FLAT, DS: FLAT, SS: FLAT
endif
INCLUDELIB LIBC
INCLUDELIB OLDNAMES
PUBLIC _lo_bit1
; Function compile flags: /Ogty
_TEXT SEGMENT
_n$ = 8
_lo_bit1 PROC NEAR
; File lobit.c
; Line 5
mov ecx, DWORD PTR _n$[esp-4]
push esi
mov esi, DWORD PTR _n$[esp+4]
mov eax, ecx
neg eax
mov edx, esi
adc edx, 0
neg edx
and edx, esi
and eax, ecx
pop esi
; Line 6
ret 0
_lo_bit1 ENDP
_TEXT ENDS
END
C:\tmp>icl /O3 /G6 /Fa /c lobit.c
Intel(R) C++ Compiler for 32-bit applications, Version 7.1 Build 20030609Z
Copyright (C) 1985-2003 Intel Corporation. All rights reserved.
lobit.c
C:\tmp>type lobit.asm
; -- Machine type PA
; mark_description "Intel(R) C++ Compiler for 32-bit applications, Version 7.1
Build 20030609Z";
; mark_description "-Qvc7 -Qlocation,link,C:\\Program Files\\Microsoft Visual
Studio .NET\\Vc7\\Bin -O3 -G6 -Fa -c";
;ident "Intel(R) C++ Compiler for 32-bit applications, Version 7.1 Build
20030609Z"
;ident "-Qvc7 -Qlocation,link,C:\Program Files\Microsoft Visual Studio
.NET\Vc7\Bin -O3 -G6 -Fa -c"
.486P
.387
_TEXT SEGMENT DWORD PUBLIC USE32 'CODE'
_TEXT ENDS
_DATA SEGMENT DWORD PUBLIC USE32 'DATA'
ALIGN 004H
_DATA ENDS
_BSS SEGMENT DWORD PUBLIC USE32 'BSS'
ALIGN 004H
_BSS ENDS
_RDATA SEGMENT DWORD PUBLIC USE32 'DATA'
ALIGN 004H
_RDATA ENDS
_TLS SEGMENT DWORD PUBLIC USE32 'TLS'
ALIGN 004H
_TLS ENDS
_DATA1 SEGMENT DWORD PUBLIC USE32 'DATA'
ALIGN 004H
_DATA1 ENDS
_TEXT1 SEGMENT DWORD PUBLIC USE32 'CODE'
ALIGN 004H
_TEXT1 ENDS
ASSUME CS:FLAT,DS:FLAT,SS:FLAT
_DATA SEGMENT DWORD PUBLIC USE32 'DATA'
_DATA ENDS
_TEXT SEGMENT DWORD PUBLIC USE32 'CODE'
; COMDAT _lo_bit1
; -- Begin _lo_bit1
; mark_begin;
IF @Version GE 612
.MMX
MMWORD TEXTEQU <QWORD>
ENDIF
IF @Version GE 614
.XMM
XMMWORD TEXTEQU <OWORD>
ENDIF
ALIGN 4
PUBLIC _lo_bit1
_lo_bit1 PROC NEAR
; parameter 1: 4 + esp
.B1.1: ; Preds .B1.0
xor eax, eax ;5.17
xor edx, edx ;5.17
sub eax, DWORD PTR [esp+4] ;5.17
sbb edx, DWORD PTR [esp+8] ;5.17
and eax, DWORD PTR [esp+4] ;5.17
and edx, DWORD PTR [esp+8] ;5.17
ret ;5.17
ALIGN 4
; LOE
; mark_end;
_lo_bit1 ENDP
;_lo_bit1 ENDS
_TEXT ENDS
_DATA SEGMENT DWORD PUBLIC USE32 'DATA'
_DATA ENDS
; -- End _lo_bit1
_DATA SEGMENT DWORD PUBLIC USE32 'DATA'
_DATA ENDS
END
That's on the one hand. On the other hand, BSF/BSR or MMX instructions are
probably still faster if you just count the cycles.
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.