Skip to content

Commit ee2999b

Browse files
committed
[PART OF] Add large page support for NNUE weights and simplify TT mem management
Use TT memory functions to allocate memory for the NNUE weights. This should provide a small speed-up on systems where large pages are not automatically used, including Windows and some Linux distributions. Further, since we now have a wrapper for std::aligned_alloc(), we can simplify the TT memory management a bit: - We no longer need to store separate pointers to the hash table and its underlying memory allocation. - We also get to merge the Linux-specific and default implementations of aligned_ttmem_alloc(). Finally, we'll enable the VirtualAlloc code path with large page support also for Win32. STC: https://tests.stockfishchess.org/tests/view/5f66595823a84a47b9036fba LLR: 2.94 (-2.94,2.94) {-0.25,1.25} Total: 14896 W: 1854 L: 1686 D: 11356 Ptnml(0-2): 65, 1224, 4742, 1312, 105 closes official-stockfish#3081 No functional change.
1 parent 6156cfe commit ee2999b

File tree

4 files changed

+32
-39
lines changed

4 files changed

+32
-39
lines changed

src/misc.cpp

Lines changed: 25 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -359,27 +359,11 @@ void std_aligned_free(void* ptr) {
359359
#endif
360360
}
361361

362-
/// aligned_ttmem_alloc() will return suitably aligned memory, if possible using large pages.
363-
/// The returned pointer is the aligned one, while the mem argument is the one that needs
364-
/// to be passed to free. With c++17 some of this functionality could be simplified.
362+
/// aligned_large_pages_alloc() will return suitably aligned memory, if possible using large pages.
365363

366-
#if defined(__linux__) && !defined(__ANDROID__)
367-
368-
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
369-
370-
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page sizes
371-
size_t size = ((allocSize + alignment - 1) / alignment) * alignment; // multiple of alignment
372-
if (posix_memalign(&mem, alignment, size))
373-
mem = nullptr;
374-
#if defined(MADV_HUGEPAGE)
375-
madvise(mem, allocSize, MADV_HUGEPAGE);
376-
#endif
377-
return mem;
378-
}
364+
#if defined(_WIN32)
379365

380-
#elif defined(_WIN64)
381-
382-
static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
366+
static void* aligned_large_pages_alloc_win(size_t allocSize) {
383367

384368
HANDLE hProcessToken { };
385369
LUID luid { };
@@ -424,12 +408,13 @@ static void* aligned_ttmem_alloc_large_pages(size_t allocSize) {
424408
return mem;
425409
}
426410

427-
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
411+
void* aligned_large_pages_alloc(size_t allocSize) {
428412

429413
static bool firstCall = true;
414+
void* mem;
430415

431416
// Try to allocate large pages
432-
mem = aligned_ttmem_alloc_large_pages(allocSize);
417+
mem = aligned_large_pages_alloc_win(allocSize);
433418

434419
// Suppress info strings on the first call. The first call occurs before 'uci'
435420
// is received and in that case this output confuses some GUIs.
@@ -451,23 +436,31 @@ void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
451436

452437
#else
453438

454-
void* aligned_ttmem_alloc(size_t allocSize, void*& mem) {
439+
void* aligned_large_pages_alloc(size_t allocSize) {
440+
441+
#if defined(__linux__)
442+
constexpr size_t alignment = 2 * 1024 * 1024; // assumed 2MB page size
443+
#else
444+
constexpr size_t alignment = 4096; // assumed small page size
445+
#endif
455446

456-
constexpr size_t alignment = 64; // assumed cache line size
457-
size_t size = allocSize + alignment - 1; // allocate some extra space
458-
mem = malloc(size);
459-
void* ret = reinterpret_cast<void*>((uintptr_t(mem) + alignment - 1) & ~uintptr_t(alignment - 1));
460-
return ret;
447+
// round up to multiples of alignment
448+
size_t size = ((allocSize + alignment - 1) / alignment) * alignment;
449+
void *mem = std_aligned_alloc(alignment, size);
450+
#if defined(MADV_HUGEPAGE)
451+
madvise(mem, size, MADV_HUGEPAGE);
452+
#endif
453+
return mem;
461454
}
462455

463456
#endif
464457

465458

466-
/// aligned_ttmem_free() will free the previously allocated ttmem
459+
/// aligned_large_pages_free() will free the previously allocated ttmem
467460

468-
#if defined(_WIN64)
461+
#if defined(_WIN32)
469462

470-
void aligned_ttmem_free(void* mem) {
463+
void aligned_large_pages_free(void* mem) {
471464

472465
if (mem && !VirtualFree(mem, 0, MEM_RELEASE))
473466
{
@@ -480,8 +473,8 @@ void aligned_ttmem_free(void* mem) {
480473

481474
#else
482475

483-
void aligned_ttmem_free(void *mem) {
484-
free(mem);
476+
void aligned_large_pages_free(void *mem) {
477+
std_aligned_free(mem);
485478
}
486479

487480
#endif

src/misc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ void prefetch(void* addr);
3535
void start_logger(const std::string& fname);
3636
void* std_aligned_alloc(size_t alignment, size_t size);
3737
void std_aligned_free(void* ptr);
38-
void* aligned_ttmem_alloc(size_t size, void*& mem);
39-
void aligned_ttmem_free(void* mem); // nop if mem == nullptr
38+
void* aligned_large_pages_alloc(size_t size); // memory aligned by page size, min alignment: 4096 bytes
39+
void aligned_large_pages_free(void* mem); // nop if mem == nullptr
4040

4141
void dbg_hit_on(bool b);
4242
void dbg_hit_on(bool c, bool b);

src/tt.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,12 @@ void TranspositionTable::resize(size_t mbSize) {
6464

6565
Threads.main()->wait_for_search_finished();
6666

67-
aligned_ttmem_free(mem);
67+
aligned_large_pages_free(table);
6868

6969
clusterCount = mbSize * 1024 * 1024 / sizeof(Cluster);
70-
table = static_cast<Cluster*>(aligned_ttmem_alloc(clusterCount * sizeof(Cluster), mem));
71-
if (!mem)
70+
71+
table = static_cast<Cluster*>(aligned_large_pages_alloc(clusterCount * sizeof(Cluster)));
72+
if (!table)
7273
{
7374
std::cerr << "Failed to allocate " << mbSize
7475
<< "MB for transposition table." << std::endl;

src/tt.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class TranspositionTable {
7575
static_assert(sizeof(Cluster) == 32, "Unexpected Cluster size");
7676

7777
public:
78-
~TranspositionTable() { aligned_ttmem_free(mem); }
78+
~TranspositionTable() { aligned_large_pages_free(table); }
7979
void new_search() { generation8 += 8; } // Lower 3 bits are used by PV flag and Bound
8080
TTEntry* probe(const Key key, bool& found) const;
8181
int hashfull() const;
@@ -91,7 +91,6 @@ class TranspositionTable {
9191

9292
size_t clusterCount;
9393
Cluster* table;
94-
void* mem;
9594
uint8_t generation8; // Size must be not bigger than TTEntry::genBound8
9695
};
9796

0 commit comments

Comments
 (0)