Computer Chess Club Archives


Search

Terms

Messages

Subject: Re: crafty 19.7 compile error

Author: Mike Byrne

Date: 18:30:44 12/13/03

Go up one level in this thread




I modified this snippet code in this in utility.c - essentially making it
un-NUMA aware while keeping SMP..
====================================================================

/*
********************************************************************************
*                                                                              *
*   Windows NUMA support                                                       *
*                                                                              *
********************************************************************************
*/


#if (defined(_WIN32) || defined(_WIN64)) && defined(SMP)


lock_t ThreadsLock;

static BOOL  (WINAPI *pGetNumaHighestNodeNumber) (PULONG);
static BOOL  (WINAPI *pGetNumaNodeProcessorMask) (UCHAR, PULONGLONG);
static DWORD (WINAPI *pSetThreadIdealProcessor)  (HANDLE, DWORD);

static volatile BOOL fThreadsInitialized = FALSE;
static BOOL fSystemIsNUMA = FALSE;
static ULONGLONG ullProcessorMask[256];
static ULONG ulNumaNodes;
static ULONG ulNumaNode = 0;

// Get NUMA-related information from Windows
#if defined(NUMA)
static void WinNumaInit (void) {
  DWORD_PTR dwMask;
  HMODULE hModule;
  ULONG ulCPU, ulNode;
  ULONGLONG ullMask;
  DWORD dwCPU;

  if (!fThreadsInitialized) {
    Lock(ThreadsLock);
    if (!fThreadsInitialized) {
      printf ("\nInitializing multiple threads.\n");
      fThreadsInitialized = TRUE;
      hModule = GetModuleHandle("kernel32");
      pGetNumaHighestNodeNumber = (void*) GetProcAddress(hModule,
"GetNumaHighestNodeNumber");
      pGetNumaNodeProcessorMask = (void*) GetProcAddress(hModule,
"GetNumaNodeProcessorMask");
      pSetThreadIdealProcessor  = (void*) GetProcAddress(hModule,
"SetThreadIdealProcessor");
      if (pGetNumaHighestNodeNumber && pGetNumaNodeProcessorMask &&
          pGetNumaHighestNodeNumber(&ulNumaNodes) && (ulNumaNodes > 0)) {
        fSystemIsNUMA = TRUE;
        if (ulNumaNodes > 255) ulNumaNodes = 255;
        printf ("System is NUMA. %d nodes reported by Windows\n",
ulNumaNodes+1);
        for (ulNode = 0; ulNode <= ulNumaNodes; ulNode ++) {
          pGetNumaNodeProcessorMask((UCHAR) ulNode, &ullProcessorMask[ulNode]);
          printf ("Node %d CPUs: ", ulNode);
          ullMask = ullProcessorMask[ulNode];
          if (0 == ullMask) fSystemIsNUMA = FALSE;
          else {
            ulCPU = 0;
            do {
              if (ullMask & 1) printf ("%d ", ulCPU);
              ulCPU ++;
              ullMask >>= 1;
            } while (ullMask);
          }
          printf ("\n");
        }
        // Thread 0 was already started on some CPU. To simplify things further,
        // exchange ullProcessorMask[0] and ullProcessorMask[node for that CPU],
        // so ullProcessorMask[0] would always be node for thread 0
        dwCPU = pSetThreadIdealProcessor(GetCurrentThread(),
MAXIMUM_PROCESSORS);
        printf ("Current ideal CPU is %u\n", dwCPU);
        pSetThreadIdealProcessor(GetCurrentThread(), dwCPU);
        if ((((DWORD) -1) != dwCPU) &&
            (MAXIMUM_PROCESSORS != dwCPU) &&
            !(ullProcessorMask[0] & (1ui64 << dwCPU))) {
          for (ulNode = 1; ulNode <= ulNumaNodes; ulNode ++) {
            if (ullProcessorMask[ulNode] & (1ui64 << dwCPU)) {
              printf ("Exchanging nodes 0 and %d\n", ulNode);
              ullMask = ullProcessorMask[ulNode];
              ullProcessorMask[ulNode] = ullProcessorMask[0];
              ullProcessorMask[0] = ullMask;
              break;
            }
          }
        }
      }
      else printf ("System is SMP, not NUMA.\n");
    }
    Unlock(ThreadsLock);
  }
}

#endif
// Start thread. For NUMA system set it affinity.

pthread_t NumaStartThread(void * func, void * args) {
  HANDLE hThread;
  ULONGLONG ullMask;

 /* WinNumaInit();
  if (fSystemIsNUMA) {
    ulNumaNode ++;
    if (ulNumaNode > ulNumaNodes) ulNumaNode = 0;
    ullMask = ullProcessorMask[ulNumaNode];
    printf ("Starting thread on node %d CPU mask %I64d\n", ulNumaNode, ullMask);
    SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR) ullMask);
    hThread = (HANDLE) _beginthreadex(0,0,func,args,CREATE_SUSPENDED,0);
    SetThreadAffinityMask(hThread, (DWORD_PTR) ullMask);
    ResumeThread(hThread);
    SetThreadAffinityMask(GetCurrentThread(), ullProcessorMask[0]);
  }
  else*/ hThread = (HANDLE) _beginthreadex(0,0,func,args,0,0);
  return hThread;
}

// Allocate memory for thread #N

void * WinMalloc(size_t cbBytes, int iThread) {
  HANDLE hThread;
  //DWORD_PTR dwAffinityMask;
  void *pBytes;
  ULONG ulNode;

/*  WinNumaInit();
  if (fSystemIsNUMA) {
    ulNode = iThread % (ulNumaNodes+1);
    hThread = GetCurrentThread();
    dwAffinityMask = SetThreadAffinityMask(hThread, ullProcessorMask[ulNode]);
    pBytes = VirtualAlloc(NULL, cbBytes, MEM_COMMIT, PAGE_READWRITE);
    memset (pBytes, 0, cbBytes);
    SetThreadAffinityMask(hThread, dwAffinityMask);
    return pBytes;
  }
  else*/ return malloc(cbBytes);
}

// Allocate interleaved memory

void * WinMallocInterleaved(size_t cbBytes, int cThreads) {
  char *pBase;
  char *pEnd;
  char * pch;
  HANDLE hThread;
//  DWORD_PTR dwAffinityMask;
  ULONG ulNode;
  SYSTEM_INFO sSysInfo;
  size_t dwStep;
  int iThread;
  DWORD dwPageSize;   // the page size on this computer
  LPVOID lpvResult;

  /*WinNumaInit();
  if (fSystemIsNUMA && (cThreads > 1)) {
    GetSystemInfo(&sSysInfo);     // populate the system information structure
    dwPageSize = sSysInfo.dwPageSize;

    // Reserve pages in the process's virtual address space.
    pBase = (char*) VirtualAlloc(NULL, cbBytes, MEM_RESERVE, PAGE_NOACCESS);
    if (pBase == NULL) {
      printf ("VirtualAlloc() reserve failed\n");
      exit(0);
    }

    // Now walk through memory, committing each page
    hThread = GetCurrentThread();
    dwStep = dwPageSize * cThreads;
    pEnd = pBase + cbBytes;
    for (iThread = 0; iThread < cThreads; iThread++) {
      ulNode = iThread % (ulNumaNodes+1);
      dwAffinityMask = SetThreadAffinityMask(hThread, ullProcessorMask[ulNode]);
      for (pch = pBase + iThread * dwPageSize;
           pch < pEnd;
           pch += dwStep) {
        lpvResult = VirtualAlloc(pch,             // next page to commit
                                 dwPageSize,      // page size, in bytes
                                 MEM_COMMIT,      // allocate a committed page
                                 PAGE_READWRITE); // read/write access
        if (lpvResult == NULL)
          ExitProcess(GetLastError());
        memset (lpvResult, 0, dwPageSize);
      }
      SetThreadAffinityMask(hThread, dwAffinityMask);
    }
  }
  else {*/
    pBase = VirtualAlloc(NULL, cbBytes, MEM_COMMIT, PAGE_READWRITE);
    if (pBase == NULL)
      ExitProcess(GetLastError());
    memset (pBase, 0, cbBytes);
  //}
  return (void *) pBase;
}

// Free interleaved memory

void WinFreeInterleaved(void *pMemory, size_t cBytes) {
  VirtualFree(pMemory,                      // base address of block
              cBytes,                       // bytes of committed pages
              MEM_DECOMMIT|MEM_RELEASE);    // decommit the pages
}

#endif

/*
********************************************************************************
*                                                                              *
*   Linux NUMA support                                                         *
*                                                                              *


====================================================================



This page took 0 seconds to execute

Last modified: Thu, 15 Apr 21 08:11:13 -0700

Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.