Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: test results comparing cygwin, msvc binaries (long code attachment)

Author: Dann Corbit

Date: 21:22:09 02/23/01

Go up one level in this thread


On February 24, 2001 at 00:01:33, Dan Newman wrote:
[snip]
>Heh.  I bought the Intel compiler (version 5.0) on the basis of reports of
>better performance than MSVC, but on my program I get a 24% hit instead...
>I imagine what may happen is that if you optimize for one compiler, you
>end up doing worse on another.  Generally I've found MSVC to be 20-30%
>better than anything else I've tried (Watcom, gcc, Intel).

Which brings up another point --
If you are thinking of buying this compiler, keep this in mind:
1.  It *requires* Microsoft C++ Professional or Enterprise edition
2.  You can try it out for free for one month.

>(It could be that I just haven't found the right switches yet.)
>
>The Intel compiler did find a few minor "bugs" that have been slipping past
>MSVC--got 4 warnings that really needed fixing--so it's not a total loss :).

It does have some excellent diagnostics.  I have found that running my code
through as many compilers as possible as well as PC-Lint and LCLint will turn up
little nagging things that need attention each time.

For floating point performance, the Intel compiler really shines.  It is
sometimes twice as fast as any other in my possession.
Try this code with your favorite compilers, and you will find that Intel crushes
the opposition:

/*
 * --------------------------------------------------------------
 * TEST_FPU  A number-crunching benchmark using matrix inversion.
 * Implemented by:   David Frank  DaveGemini@aol.com
 * Gauss routine by: Tim Prince   N8TM@aol.com
 * Crout routine by: James Van Buskirk  torsop@ix.netcom.com
 * F90->C source by: Sergey N. Voronkov serg@ggd.nsu.ru
 * rgaussi is due to Dieter Buerssner buers@gmx.de
 * --------------------------------------------------------------
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>

/*
 * Compiling with NI = 1000 (default) generates pool(51,51,1000) = 20mb. If
 * test system pages due to insufficient memory (disk i/o activity occurs),
 * abort run and compile with NI = 200, benchmark will adjust time x 5.
 */

#define NI 1000
#define NN 51
#define RK8 double


/* below are additional C routines supplied by translator */

void            memflt()
{
    fputs("Memory allocation error\n", stderr);
    exit(EXIT_FAILURE);
}


void            alloc_arrays(RK8 ** p[NI], RK8 *** a, RK8 *** b)
{
    int             i,
                    j;

    for (i = 0; i < NI; i++) {
        if ((p[i] = (RK8 **) malloc(NN * sizeof(RK8 *))) == NULL)
            memflt();
        for (j = 0; j < NN; j++)
            if ((p[i][j] = (RK8 *) malloc(NN * sizeof(RK8))) == NULL)
                memflt();
    }
    if ((*a = (RK8 **) malloc(NN * sizeof(RK8 *))) == NULL ||
        (*b = (RK8 **) malloc(NN * sizeof(RK8 *))) == NULL)
        memflt();
    for (i = 0; i < NN; i++)
        if (((*a)[i] = (RK8 *) malloc(NN * sizeof(RK8))) == NULL ||
            ((*b)[i] = (RK8 *) malloc(NN * sizeof(RK8))) == NULL)
            memflt();
}


void            random_number(RK8 ** pool[NI])
{
    int             i,
                    j,
                    k;

    for (i = 0; i < NI; i++)
        for (j = 0; j < NN; j++)
            for (k = 0; k < NN; k++)
                pool[i][j][k] = (RK8) (rand()) / RAND_MAX;
}


RK8
timesec()
{
    return (RK8) (clock()) / CLOCKS_PER_SEC;
}


/* prototype the invert functions that follow exec source */
void            Gauss(RK8 ** a, RK8 ** b, int n);
void            Crout(RK8 ** a, RK8 ** b, int n);
int             rgaussi(RK8 ** a, RK8 ** b, int n);

int             main()
{

    RK8           **pool[NI];   /* pool of matrices to invert */
    RK8           **a,
                  **ai;         /* working matrices use < 256k */
    RK8             avg_err,
                    total_time,
                    time1;
    int             i,
                    j,
                    n;

    char           *revision = "01/10/98";      /* Gauss speedup mod */
    char            invert_id[3][6] =
    {
    "Gauss", "Crout", "Dieter"};

    struct tm      *ptm;
    time_t          crtime;
    FILE           *fp;

    /* Begin by allocating matrix arrays */
    alloc_arrays(pool, &a, &ai);

    puts("Benchmark running, hopefully as only ACTIVE task");

    if ((fp = fopen("test_fpc.dat", "w")) == NULL) {
        fprintf(stderr, "Can't open output file!\n");
        return EXIT_FAILURE;
    }
    crtime = time(NULL);
    ptm = gmtime(&crtime);

    fprintf(fp, "Date run = %2d/%2d/%2d\n",
            ptm->tm_mon + 1, ptm->tm_mday, ptm->tm_year);

    fputs("Pls supply info below, send to DaveGemini@aol.com\n"
          "Tester name/ net address = \n"
          "CPU mfg/id/Mhz = \n"
          "MEM/CACHE = \n"
          "O.S. / COMPILER = \n"
          "Additional comments = \n\n\n\n\n", fp);

    fprintf(fp, "Results for %s revision using TEST_FPU.C \n", revision);

    srand(time(NULL));          /* set seed to random number based on time */
    random_number(pool);        /* fill pool with random data ( 0. -> 1. ) */

    for (n = 0; n < 3; n++) {   /* for Gauss,Crout algorithms */
        time1 = timesec();      /* start benchmark n time */

        for (i = 0; i < NI; i++) {
            /* get next matrix to invert */
            for (j = 0; j < NN; j++)
                memcpy(a[j], pool[i][j], sizeof(RK8) * NN);

            switch (n) {
            case 0:
                Gauss(a, ai, NN);       /* invert a -> ai ; destructs a */

                Gauss(ai, a, NN);       /* invert ai -> a */
                break;
            case 1:
                Crout(a, ai, NN);       /* invert a -> ai ; nondestructs a */

                Crout(ai, a, NN);       /* invert ai -> a */
                break;
            case 2:
                rgaussi(a, ai, NN);       /* invert a -> ai ; nondestructs a */

                rgaussi(ai, a, NN);       /* invert ai -> a */
                break;
            }
        }

        total_time = timesec() - time1; /* = elapsed time sec. */

        /* check accuracy last matrix invert. */
        avg_err = 0;
        for (i = 0; i < NN; i++)
            for (j = 0; j < NN; j++)
                avg_err += fabs(a[i][j] - pool[NI - 1][i][j]);

        if (NI == 200)
            fprintf(fp, "\n%s 5 x 200 x 2 inverts = %6.1f sec.\n",
                    invert_id[n], 5 * total_time);
        else
            fprintf(fp, "\n%s 1000 x 2 inverts = %6.1f sec.\n",
                    invert_id[n], total_time);

        fputs("Accuracy of 2 computed numbers\n", fp);
        fprintf(fp, "Original = %18.15f %18.15f\n",
                pool[NI - 1][0][0], pool[NI - 1][NN - 1][NN - 1]);
        fprintf(fp, "Computed = %18.15f %18.15f\n",
                a[0][0], a[NN - 1][NN - 1]);
        fprintf(fp, "Avg Err. = %18.15f\n", avg_err / (NN * NN));

    }                           /* for Gauss,Crout algorithms */

    puts("Results written to: TEST_FPC.DAT");

    return EXIT_SUCCESS;
}


/*
 * --------------------------------------
 * Invert matrix a -> b  by Gauss method
 * --------------------------------------
 */
void            Gauss(RK8 ** a, RK8 ** b, int n)
{
    RK8             d,
                    temp = 0,
                    c;
    int             i,
                    j,
                    k,
                    m,
                    nn,
                   *ipvt;

    if ((ipvt = (int *) malloc(n * sizeof(int))) == NULL)
        memflt();

    nn = n;
    for (i = 0; i < nn; i++)
        ipvt[i] = i;

    for (k = 0; k < nn; k++) {
        temp = 0.;
        m = k;
        for (i = k; i < nn; i++) {
            d = a[k][i];
            if (fabs(d) > temp) {
                temp = fabs(d);
                m = i;
            }
        }
        if (m != k) {
            j = ipvt[k];
            ipvt[k] = ipvt[m];
            ipvt[m] = j;
            for (j = 0; j < nn; j++) {
                temp = a[j][k];
                a[j][k] = a[j][m];
                a[j][m] = temp;
            }
        }
        d = 1 / a[k][k];
        for (j = 0; j < k; j++) {
            c = a[j][k] * d;
            for (i = 0; i < nn; i++)
                a[j][i] -= a[k][i] * c;
            a[j][k] = c;
        }
        for (j = k + 1; j < nn; j++) {
            c = a[j][k] * d;
            for (i = 0; i < nn; i++)
                a[j][i] -= a[k][i] * c;
            a[j][k] = c;
        }
        for (i = 0; i < nn; i++)
            a[k][i] = -a[k][i] * d;
        a[k][k] = d;
    }

    for (i = 0; i < nn; i++)
        memcpy(b[ipvt[i]], a[i], sizeof(RK8) * nn);

    free(ipvt);
}


/*
 * --------------------------------------
 * Invert matrix a -> b  by Crout method
 * --------------------------------------
 */
void            Crout(RK8 ** a, RK8 ** b, int n)
{
    int             i,
                    j;          /* Current row & column */
    int             maxlok;     /* Location of maximum pivot */
    int            *index;      /* Partial pivot record */
    RK8            *temp = 0,
                    the_max;
    RK8             tmp,
                   *ptr;
    RK8            *matr = 0;
    int             k,
                    ind,
                    ind2;

    if ((index = (int *) malloc(n * sizeof(int))) == NULL ||
        (temp = (RK8 *) malloc(n * sizeof(RK8))) == NULL ||
        (matr = (RK8 *) malloc(n * n * sizeof(RK8))) == NULL)
        memflt();

    /* Initialize everything */

    for (i = 0; i < n; i++)
        index[i] = i;

    /* Shuffle matrix */
    for (j = 0; j < n; j++) {
        for (i = 0; i < j; i++)
            b[j][i] = a[j][i];
        for (i = j; i < n; i++)
            b[j][i] = a[i - j][n - j - 1];
    }

    /* LU decomposition; reciprocals of diagonal elements in L matrix */
    for (j = 0; j < n; j++) {
        /* Get current column of L matrix */
        for (i = j; i < n; i++) {
            tmp = 0;
            ind = n - i - 1;
            for (k = 0; k < j; k++)
                tmp += b[ind][ind + k] * b[j][k];
            b[ind][ind + j] -= tmp;
        }
        maxlok = 0;
        the_max = fabs(b[0][j]);
        for (i = 1; i < n - j; i++)
            if (fabs(b[i][j + i]) >= the_max) {
                the_max = fabs(b[i][j + i]);
                maxlok = i;
            }
        /* det = det*b(j+maxlok-1,maxlok) */
        b[maxlok][j + maxlok] = 1 / b[maxlok][j + maxlok];

        /* Swap biggest element to current pivot position */
        if (maxlok + 1 != n - j) {
            ind = n - maxlok - 1;
            ind2 = index[j];
            index[j] = index[ind];
            index[ind] = ind2;
            for (k = n - maxlok; k < n; k++) {
                tmp = b[k][j];
                b[k][j] = b[k][ind];
                b[k][ind] = tmp;
            }
            memcpy(temp, &(b[maxlok][maxlok]), sizeof(RK8) * (n - maxlok));
            ptr = &(b[n - j - 1][n - j - 1]);
            memmove(&(b[maxlok][maxlok]), ptr, sizeof(RK8) * (j + 1));
            for (k = j + 1; k < n - maxlok; k++)
                b[maxlok][maxlok + k] = b[k][j];
            memcpy(ptr, temp, (j + 1) * sizeof(RK8));
            for (k = j + 1; k < n - maxlok; k++)
                b[k][j] = temp[k];
        }
        /* Get current row of U matrix */
        ind = n - j - 1;
        for (i = j + 1; i < n; i++) {
            tmp = 0.;
            for (k = 0; k < j; k++)
                tmp += b[i][k] * b[ind][ind + k];
            b[i][j] = b[ind][n - 1] * (b[i][j] - tmp);
        }
    }                           /* END DO LU_outer */

    /* Invert L matrix */
    for (j = 0; j < n - 1; j++) {
        temp[0] = b[n - j - 1][n - 1];
        for (i = j + 1; i < n; i++) {
            ind = n - i - 1;
            tmp = 0.;
            for (k = 0; k < i - j; k++)
                tmp += temp[k] * b[ind][ind + j + k];
            b[ind][ind + j] = -tmp * b[ind][n - 1];
            temp[i - j] = b[ind][ind + j];
        }
    }

    /* Reshuffle matrix */
    for (i = 0; i < (n + 1) / 3; i++) {
        memcpy(temp, &(b[i][2 * (i + 1) - 1]), sizeof(RK8) * (n + 2 - 3 * (i +
1)));
        for (j = 2 * (i + 1) - 1; j < n - i; j++)
            b[i][j] = b[n - j - 1][n - j + i - 1];
        ind = n - i - 1;
        for (j = i; j < n + 1 - 2 * (i + 1); j++)
            b[j][i + j] = b[n - i - j - 1][ind];
        for (k = 0; k < n + 2 - 3 * (i + 1); k++)
            b[i + 1 + k][ind] = temp[k];
    }

    /* Invert U matrix */
    for (i = 0; i < n - 1; i++) {
        for (j = i + 1; j < n; j++) {
            tmp = 0.;
            for (k = 0; k < j - i - 1; k++)
                tmp += temp[k] * b[j][i + 1 + k];
            b[j][i] = -b[j][i] - tmp;
            temp[j - i - 1] = b[j][i];
        }
    }

    /* Multiply inverses in reverse order */
    for (i = 0; i < n - 1; i++) {
        for (k = 0; k < n - i - 1; k++)
            temp[k] = b[i + 1 + k][i];
        for (j = 0; j <= i; j++) {
            tmp = 0.;
            for (k = 0; k < n - i - 1; k++)
                tmp += temp[k] * b[j][i + 1 + k];
            b[j][i] += tmp;
        }
        for (j = i + 1; j < n; j++) {
            tmp = 0.;
            for (k = j; k < n; k++)
                tmp += temp[k - i - 1] * b[j][k];
            b[j][i] = tmp;
        }
    }

    /* Straighten out the columns of the result */
    for (i = 0; i < n; i++)
        memcpy(matr + n * i, b[i], sizeof(RK8) * n);
    for (i = 0; i < n; i++)
        memcpy(b[index[i]], matr + n * i, sizeof(RK8) * n);

    free(index);
    free(temp);
    free(matr);
}

/*
** This routine is due to buers@gmx.de (Dieter Buerssner)
** Destroys a, return 0: success, 1: zero pivot, 2: out of mem.
*/
int             rgaussi(RK8 ** a, RK8 ** b, int n)
{
    int             i,
                    j,
                    k,
                    maxj,
                    t;
    RK8             maxel,
                    pivinv,
                    tmaxel,
                    aji;
    RK8            *tp,
                   *ai,
                   *aj;
    /* C99: int ipiv[n]; */
    int            *ipiv = malloc(n * sizeof *ipiv);

    if (ipiv == NULL)
        return 2;
    for (i = 0; i < n; i++)
        ipiv[i] = i;
    for (i = 0; i < n; i++) {
        maxj = -1;
        maxel = 0.0;
        /* find pivot element */
        for (j = i; j < n; j++)
            if ((tmaxel = fabs(a[j][i])) > maxel) {
                maxj = j;
                maxel = tmaxel;
            }
        if (maxj < 0) {
            free(ipiv);
            return 1;
        }
        /* exchange rows */
        if (maxj != i) {
            /* just exchange pointers for a */
            tp = a[maxj];
            a[maxj] = a[i];
            a[i] = tp;
            t = ipiv[maxj];
            ipiv[maxj] = ipiv[i];
            ipiv[i] = t;
        }
        ai = a[i];
        pivinv = 1.0 / ai[i];
        ai[i] = 1.0;
        for (k = 0; k < n; k++)
            ai[k] *= pivinv;
        for (j = 0; j < n; j++) {
            if (j != i) {
                aj = a[j];
                aji = aj[i];
                aj[i] = 0.0;
                for (k = 0; k < n; k++)
                    aj[k] -= aji * ai[k];
            }
        }
    }
    for (i = 0; i < n; i++)
        for (j = 0; j < n; j++)
            b[i][ipiv[j]] = a[i][j];
    free(ipiv);
    return 0;
}



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.