Author: Mike Byrne
Date: 18:48:58 07/27/04
Go up one level in this thread
change the chess.h section for NT_i386 to look like this:
#if defined(NT_i386)
# undef HAS_64BITS /* machine has 64-bit integers / operators
*/
# define HAS_LONGLONG /* machine has 32-bit/64-bit integers
*/
# undef UNIX /* system is unix-based
*/
# undef STDCALL
# define STDCALL __stdcall
# ifdef VC_INLINE_ASM
# undef CDECL
# define CDECL __cdecl
# define USE_ASSEMBLY
# endif
#endif
change the the *.c files (boolean and data.c) that have "INLINE_ASM"
to "USE_ASSEMBLY" and then if you have problems with compiling the
NUMA scetion in utility.c - just comment out the NUMA section -- if
you be careful and follow the brackets.
here is the section that has it modified with the NUMA sections
commented out:
==============================================================================
/*
*******************************************************************************
*
*
* Windows NUMA support
*
*
*
*******************************************************************************
*/
#if (defined(_WIN32) || defined(_WIN64)) && defined(SMP)
lock_t ThreadsLock;
static BOOL(WINAPI * pGetNumaHighestNodeNumber) (PULONG);
static BOOL(WINAPI * pGetNumaNodeProcessorMask) (UCHAR, PULONGLONG);
static DWORD(WINAPI * pSetThreadIdealProcessor) (HANDLE, DWORD);
static volatile BOOL fThreadsInitialized = FALSE;
static BOOL fSystemIsNUMA = FALSE;
static ULONGLONG ullProcessorMask[256];
static ULONG ulNumaNodes;
static ULONG ulNumaNode = 0;
// Get NUMA-related information from Windows
#if defined (NUMA) //byrne
static void WinNumaInit(void)
{
DWORD_PTR dwMask;
HMODULE hModule;
ULONG ulCPU, ulNode;
ULONGLONG ullMask;
DWORD dwCPU;
if (!fThreadsInitialized) {
Lock(ThreadsLock);
if (!fThreadsInitialized) {
printf("\nInitializing multiple threads.\n");
fThreadsInitialized = TRUE;
hModule = GetModuleHandle("kernel32");
pGetNumaHighestNodeNumber =
(void *) GetProcAddress(hModule,
"GetNumaHighestNodeNumber");
pGetNumaNodeProcessorMask =
(void *) GetProcAddress(hModule,
"GetNumaNodeProcessorMask");
pSetThreadIdealProcessor =
(void *) GetProcAddress(hModule, "SetThreadIdealProcessor");
if (pGetNumaHighestNodeNumber && pGetNumaNodeProcessorMask &&
pGetNumaHighestNodeNumber(&ulNumaNodes) && (ulNumaNodes >
0)) {
fSystemIsNUMA = TRUE;
if (ulNumaNodes > 255)
ulNumaNodes = 255;
printf("System is NUMA. %d nodes reported by Windows\n",
ulNumaNodes + 1);
for (ulNode = 0; ulNode <= ulNumaNodes; ulNode++) {
pGetNumaNodeProcessorMask((UCHAR) ulNode,
&ullProcessorMask[ulNode]);
printf("Node %d CPUs: ", ulNode);
ullMask = ullProcessorMask[ulNode];
if (0 == ullMask)
fSystemIsNUMA = FALSE;
else {
ulCPU = 0;
do {
if (ullMask & 1)
printf("%d ", ulCPU);
ulCPU++;
ullMask >>= 1;
} while (ullMask);
}
printf("\n");
}
// Thread 0 was already started on some CPU. To simplify things
further,
// exchange ullProcessorMask[0] and ullProcessorMask[node for that
CPU],
// so ullProcessorMask[0] would always be node for thread 0
dwCPU =
pSetThreadIdealProcessor(GetCurrentThread(),
MAXIMUM_PROCESSORS);
printf("Current ideal CPU is %u\n", dwCPU);
pSetThreadIdealProcessor(GetCurrentThread(), dwCPU);
if ((((DWORD) - 1) != dwCPU) && (MAXIMUM_PROCESSORS != dwCPU)
&&
!(ullProcessorMask[0] & (1u i64 << dwCPU))) {
for (ulNode = 1; ulNode <= ulNumaNodes; ulNode++) {
if (ullProcessorMask[ulNode] & (1u i64 << dwCPU)) {
printf("Exchanging nodes 0 and %d\n", ulNode);
ullMask = ullProcessorMask[ulNode];
ullProcessorMask[ulNode] = ullProcessorMask[0];
ullProcessorMask[0] = ullMask;
break;
}
}
}
} else
printf("System is SMP, not NUMA.\n");
}
Unlock(ThreadsLock);
}
}
#endif //byrne
// Start thread. For NUMA system set it affinity.
pthread_t NumaStartThread(void *func, void *args)
{
HANDLE hThread;
ULONGLONG ullMask;
//byrne
/*WinNumaInit();
if (fSystemIsNUMA) {
ulNumaNode++;
if (ulNumaNode > ulNumaNodes)
ulNumaNode = 0;
ullMask = ullProcessorMask[ulNumaNode];
printf("Starting thread on node %d CPU mask %I64d\n", ulNumaNode,
ullMask);
SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR) ullMask);
hThread = (HANDLE) _beginthreadex(0, 0, func, args,
CREATE_SUSPENDED, 0);
SetThreadAffinityMask(hThread, (DWORD_PTR) ullMask);
ResumeThread(hThread);
SetThreadAffinityMask(GetCurrentThread(), ullProcessorMask[0]);
} else*/ //byrne
hThread = (HANDLE) _beginthreadex(0, 0, func, args, 0, 0);
return hThread;
}
// Allocate memory for thread #N
void *WinMalloc(size_t cbBytes, int iThread)
{
HANDLE hThread;
//DWORD_PTR dwAffinityMask; byrne
void *pBytes;
ULONG ulNode;
/*WinNumaInit(); byrne
if (fSystemIsNUMA) {
ulNode = iThread % (ulNumaNodes + 1);
hThread = GetCurrentThread();
dwAffinityMask = SetThreadAffinityMask(hThread,
ullProcessorMask[ulNode]);
pBytes = VirtualAlloc(NULL, cbBytes, MEM_COMMIT, PAGE_READWRITE);
memset(pBytes, 0, cbBytes);
SetThreadAffinityMask(hThread, dwAffinityMask);
return pBytes;
} else*/ //byrne
return malloc(cbBytes);
}
// Allocate interleaved memory
void *WinMallocInterleaved(size_t cbBytes, int cThreads)
{
char *pBase;
char *pEnd;
char *pch;
HANDLE hThread;
//DWORD_PTR dwAffinityMask;byrne
ULONG ulNode;
SYSTEM_INFO sSysInfo;
size_t dwStep;
int iThread;
DWORD dwPageSize; // the page size on this computer
LPVOID lpvResult;
/*WinNumaInit();//byrne
if (fSystemIsNUMA && (cThreads > 1)) {
GetSystemInfo(&sSysInfo); // populate the system information
structure
dwPageSize = sSysInfo.dwPageSize;
// Reserve pages in the process's virtual address space.
pBase = (char *) VirtualAlloc(NULL, cbBytes, MEM_RESERVE,
PAGE_NOACCESS);
if (pBase == NULL) {
printf("VirtualAlloc() reserve failed\n");
exit(0);
}
// Now walk through memory, committing each page
hThread = GetCurrentThread();
dwStep = dwPageSize * cThreads;
pEnd = pBase + cbBytes;
for (iThread = 0; iThread < cThreads; iThread++) {
ulNode = iThread % (ulNumaNodes + 1);
dwAffinityMask = SetThreadAffinityMask(hThread,
ullProcessorMask[ulNode]);
for (pch = pBase + iThread * dwPageSize; pch < pEnd; pch +=
dwStep) {
lpvResult = VirtualAlloc(pch, // next page to commit
dwPageSize, // page size, in bytes
MEM_COMMIT, // allocate a committed page
PAGE_READWRITE); // read/write access
if (lpvResult == NULL)
ExitProcess(GetLastError());
memset(lpvResult, 0, dwPageSize);
}
SetThreadAffinityMask(hThread, dwAffinityMask);
}
} else {*/ //byrne
pBase = VirtualAlloc(NULL, cbBytes, MEM_COMMIT, PAGE_READWRITE);
if (pBase == NULL)
ExitProcess(GetLastError());
memset(pBase, 0, cbBytes);
//}byrne
return (void *) pBase;
}
// Free interleaved memory
void WinFreeInterleaved(void *pMemory, size_t cBytes)
{
VirtualFree(pMemory, 0, MEM_RELEASE);
}
#endif
/*
*******************************************************************************
*
*
* Linux NUMA support
*
*
*
*******************************************************************************
*/
#if defined(LINUX) && defined(NUMA)
/*
*******************************************************************************
*
*
* First, discover if we are on a NUMA box. If not, the normal SMP
stuff is *
* primed and ready to go. If we are on a NUMA machine, we need to
know (a) *
* how many processors (nodes in the case of AMD/Intel) we have on
the *
* machine and (b) how many processors (threads) the user intends to
run. *
* It becomes important for the "smpmt=n" command to be either on
the *
* command-line or in the crafty.rc/.craftyrc files, otherwise we
might not *
* get things initialized optimally in a NUMA environment.
*
*
*
*******************************************************************************
*/
void NumaInit(void)
{
int numa_machine, maxNumaNodes;
if (!fThreadsInitialized) {
Lock(ThreadsLock);
if (!fThreadsInitialized) {
printf("\nInitializing multiple threads.\n");
fThreadsInitialized = TRUE;
numa_machine = numa_available();
if (numa_machine >= 0) {
maxNumaNodes = numa_max_node();
printf("System is NUMA. %d nodes reported by Linux\n",
maxNumaNodes + 1);
} else {
Print(4095, "system is not NUMA, skipping NUMA
initialization\n");
return;
}
pGetNumaHighestNodeNumber =
(void *) GetProcAddress(hModule,
"GetNumaHighestNodeNumber");
pGetNumaNodeProcessorMask =
(void *) GetProcAddress(hModule,
"GetNumaNodeProcessorMask");
pSetThreadIdealProcessor =
(void *) GetProcAddress(hModule, "SetThreadIdealProcessor");
if (pGetNumaHighestNodeNumber && pGetNumaNodeProcessorMask &&
pGetNumaHighestNodeNumber(&ulNumaNodes) && (ulNumaNodes >
0)) {
fSystemIsNUMA = TRUE;
if (ulNumaNodes > 255)
ulNumaNodes = 255;
printf("System is NUMA. %d nodes reported by Windows\n",
ulNumaNodes + 1);
for (ulNode = 0; ulNode <= ulNumaNodes; ulNode++) {
pGetNumaNodeProcessorMask((UCHAR) ulNode,
&ullProcessorMask[ulNode]);
printf("Node %d CPUs: ", ulNode);
ullMask = ullProcessorMask[ulNode];
if (0 == ullMask)
fSystemIsNUMA = FALSE;
else {
ulCPU = 0;
do {
if (ullMask & 1)
printf("%d ", ulCPU);
ulCPU++;
ullMask >>= 1;
} while (ullMask);
}
printf("\n");
}
// Thread 0 was already started on some CPU. To simplify things
further,
// exchange ullProcessorMask[0] and ullProcessorMask[node for that
CPU],
// so ullProcessorMask[0] would always be node for thread 0
dwCPU =
pSetThreadIdealProcessor(GetCurrentThread(),
MAXIMUM_PROCESSORS);
printf("Current ideal CPU is %u\n", dwCPU);
pSetThreadIdealProcessor(GetCurrentThread(), dwCPU);
if ((((DWORD) - 1) != dwCPU) && (MAXIMUM_PROCESSORS != dwCPU)
&&
!(ullProcessorMask[0] & (1u i64 << dwCPU))) {
for (ulNode = 1; ulNode <= ulNumaNodes; ulNode++) {
if (ullProcessorMask[ulNode] & (1u i64 << dwCPU)) {
printf("Exchanging nodes 0 and %d\n", ulNode);
ullMask = ullProcessorMask[ulNode];
ullProcessorMask[ulNode] = ullProcessorMask[0];
ullProcessorMask[0] = ullMask;
break;
}
}
}
} else
printf("System is SMP, not NUMA.\n");
}
Unlock(ThreadsLock);
}
}
// Start thread. For NUMA system set it affinity.
pthread_t NumaStartThread(void *func, void *args)
{
HANDLE hThread;
ULONGLONG ullMask;
/* WinNumaInit(); //byrne
if (fSystemIsNUMA) {
ulNumaNode++;
if (ulNumaNode > ulNumaNodes)
ulNumaNode = 0;
ullMask = ullProcessorMask[ulNumaNode];
printf("Starting thread on node %d CPU mask %I64d\n", ulNumaNode,
ullMask);
SetThreadAffinityMask(GetCurrentThread(), (DWORD_PTR) ullMask);
hThread = (HANDLE) _beginthreadex(0, 0, func, args,
CREATE_SUSPENDED, 0);
SetThreadAffinityMask(hThread, (DWORD_PTR) ullMask);
ResumeThread(hThread);
SetThreadAffinityMask(GetCurrentThread(), ullProcessorMask[0]);
} else */ //byrne
hThread = (HANDLE) _beginthreadex(0, 0, func, args, 0, 0);
return hThread;
}
// Allocate memory for thread #N
void *WinMalloc(size_t cbBytes, int iThread)
{
HANDLE hThread;
DWORD_PTR dwAffinityMask;
void *pBytes;
ULONG ulNode;
/*WinNumaInit();//byrne
if (fSystemIsNUMA) {
ulNode = iThread % (ulNumaNodes + 1);
hThread = GetCurrentThread();
dwAffinityMask = SetThreadAffinityMask(hThread,
ullProcessorMask[ulNode]);
pBytes = VirtualAlloc(NULL, cbBytes, MEM_COMMIT, PAGE_READWRITE);
memset(pBytes, 0, cbBytes);
SetThreadAffinityMask(hThread, dwAffinityMask);
return pBytes;
} else*///byrne
return malloc(cbBytes);
}
// Allocate interleaved memory
void *WinMallocInterleaved(size_t cbBytes, int cThreads)
{
char *pBase;
char *pEnd;
char *pch;
HANDLE hThread;
DWORD_PTR dwAffinityMask;
ULONG ulNode;
SYSTEM_INFO sSysInfo;
size_t dwStep;
int iThread;
DWORD dwPageSize; // the page size on this computer
LPVOID lpvResult;
WinNumaInit();
if (fSystemIsNUMA && (cThreads > 1)) {
GetSystemInfo(&sSysInfo); // populate the system information
structure
dwPageSize = sSysInfo.dwPageSize;
// Reserve pages in the process's virtual address space.
pBase = (char *) VirtualAlloc(NULL, cbBytes, MEM_RESERVE,
PAGE_NOACCESS);
if (pBase == NULL) {
printf("VirtualAlloc() reserve failed\n");
exit(0);
}
// Now walk through memory, committing each page
hThread = GetCurrentThread();
dwStep = dwPageSize * cThreads;
pEnd = pBase + cbBytes;
for (iThread = 0; iThread < cThreads; iThread++) {
ulNode = iThread % (ulNumaNodes + 1);
dwAffinityMask = SetThreadAffinityMask(hThread,
ullProcessorMask[ulNode]);
for (pch = pBase + iThread * dwPageSize; pch < pEnd; pch +=
dwStep) {
lpvResult = VirtualAlloc(pch, // next page to commit
dwPageSize, // page size, in bytes
MEM_COMMIT, // allocate a committed page
PAGE_READWRITE); // read/write access
if (lpvResult == NULL)
ExitProcess(GetLastError());
memset(lpvResult, 0, dwPageSize);
}
SetThreadAffinityMask(hThread, dwAffinityMask);
}
} else {
pBase = VirtualAlloc(NULL, cbBytes, MEM_COMMIT, PAGE_READWRITE);
if (pBase == NULL)
ExitProcess(GetLastError());
memset(pBase, 0, cbBytes);
}
return (void *) pBase;
}
// Free interleaved memory
void WinFreeInterleaved(void *pMemory, size_t cBytes)
{
VirtualFree(pMemory, // base address of block
cBytes, // bytes of committed pages
MEM_DECOMMIT | MEM_RELEASE); // decommit the pages
}
#endif
==========================================================================
This page took 0 seconds to execute
Last modified: Thu, 15 Apr 21 08:11:13 -0700
Current Computer Chess Club Forums at Talkchess. This site by Sean Mintz.