Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: what is the meaning of RESTRICT in crafty?

Author: Dieter Buerssner

Date: 12:23:40 12/18/03

Go up one level in this thread


On December 18, 2003 at 02:35:02, Tony Werten wrote:

>In this case it will work but I don't think the compiler will do that without
>RESTRICT.

See example below. I see no reason, why it shouldn't. The other optimization I
meantioned would not be possible (and not be done, of course).

>Problems could arise if c points to somewhere in the a or b array.

I don't think so. The method of my manual unrolling will work in any case the
original code worked. If c aliases a or b, the original code probably did not
work as intended, but the unrolled code will give the same result. A test case;
foo is just to give a chance for inlining and see, that everything got unrolled
(same if it would be called with len=10).

C:\src>gcc -O3 -S -funroll-loops d.c

C:\src>cat d.c
void vectoradd(double *a, double *b, double *c, unsigned len)
{
  unsigned i;
  for(i = 0; i < len; i++)
    c[i+1] = b[i] + a[i];
}

void foo(double *a, double *b, double *c)
{
  vectoradd(a,b,c,4);
}

C:\src>gcc -O3 -S -funroll-loops d.c

C:\src>cat d.s
	.file	"d.c"
	.section .text
	.p2align 1
	.p2align 4,,15
.globl _vectoradd
_vectoradd:
	pushl	%ebp
	xorl	%edx, %edx
	movl	%esp, %ebp
	pushl	%edi
	pushl	%esi
	movl	20(%ebp), %edi
	pushl	%ebx
	movl	8(%ebp), %esi
	movl	16(%ebp), %ecx
	movl	12(%ebp), %ebx
	cmpl	%edi, %edx
	jae	L8
	movl	%edi, %eax
	andl	$3, %eax
	cmpl	$1, %edi
	ja	L28
L11:
	fldl	(%esi,%edx,8)
	faddl	(%ebx,%edx,8)
	fstpl	8(%ecx,%edx,8)
	incl	%edx
	cmpl	%edi, %edx
	jae	L8
; loop unrolled by four
	.p2align 4,,7
L6:
	fldl	(%esi,%edx,8)
	leal	1(%edx), %eax
	faddl	(%ebx,%edx,8)
	fstpl	8(%ecx,%edx,8)
	fldl	(%esi,%eax,8)
	faddl	(%ebx,%eax,8)
	fstpl	8(%ecx,%eax,8)
	leal	2(%edx), %eax
	fldl	(%esi,%eax,8)
	faddl	(%ebx,%eax,8)
	fstpl	8(%ecx,%eax,8)
	leal	3(%edx), %eax
	addl	$4, %edx
	cmpl	%edi, %edx
	fldl	(%esi,%eax,8)
	faddl	(%ebx,%eax,8)
	fstpl	8(%ecx,%eax,8)
	jb	L6
L8:
	popl	%ebx
	popl	%esi
	popl	%edi
	popl	%ebp
	ret
L28:
	testl	%eax, %eax
	je	L6
	cmpl	$1, %eax
	jle	L11
	cmpl	$2, %eax
	jle	L12
	fldl	(%esi)
	movl	$1, %edx
	faddl	(%ebx)
	fstpl	8(%ecx)
L12:
	fldl	(%esi,%edx,8)
	faddl	(%ebx,%edx,8)
	fstpl	8(%ecx,%edx,8)
	incl	%edx
	jmp	L11
	.p2align 1
	.p2align 4,,15
.globl _foo
_foo:
	pushl	%ebp
	movl	%esp, %ebp
	movl	8(%ebp), %eax
	movl	12(%ebp), %edx
	movl	16(%ebp), %ecx
; everything unrolled
	fldl	(%eax)
	faddl	(%edx)
	fstpl	8(%ecx)
	fldl	8(%eax)
	faddl	8(%edx)
	fstpl	16(%ecx)
	fldl	16(%eax)
	faddl	16(%edx)
	fstpl	24(%ecx)
	fldl	24(%eax)
	faddl	24(%edx)
	fstpl	32(%ecx)
	popl	%ebp
	ret
	.ident	"GCC: (GNU) 3.2"

Regards,
Dieter




This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.