Skip to content

Commit 49ef4c9

Browse files
gab8192Disservin
authored andcommitted
Implement accumulator refresh table
For each thread persist an accumulator cache for the network, where each cache contains multiple entries for each of the possible king squares. When the accumulator needs to be refreshed, the cached entry is used to more efficiently update the accumulator, instead of rebuilding it from scratch. This idea, was first described by Luecx (author of Koivisto) and is commonly referred to as "Finny Tables". When the accumulator needs to be refreshed, instead of filling it with biases and adding every piece from scratch, we... 1. Take the `AccumulatorRefreshEntry` associated with the new king bucket 2. Calculate the features to activate and deactivate (from differences between bitboards in the entry and bitboards of the actual position) 3. Apply the updates on the refresh entry 4. Copy the content of the refresh entry accumulator to the accumulator we were refreshing 5. Copy the bitboards from the position to the refresh entry, to match the newly updated accumulator Results at STC: https://tests.stockfishchess.org/tests/view/662301573fe04ce4cefc1386 (first version) https://tests.stockfishchess.org/tests/view/6627fa063fe04ce4cefc6560 (final) Non-Regression between first and final: https://tests.stockfishchess.org/tests/view/662801e33fe04ce4cefc660a STC SMP: https://tests.stockfishchess.org/tests/view/662808133fe04ce4cefc667c closes #5183 No functional change
1 parent fcba524 commit 49ef4c9

File tree

12 files changed

+349
-80
lines changed

12 files changed

+349
-80
lines changed

src/evaluate.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,14 @@
2525
#include <iomanip>
2626
#include <iostream>
2727
#include <sstream>
28+
#include <memory>
2829

2930
#include "nnue/network.h"
3031
#include "nnue/nnue_misc.h"
3132
#include "position.h"
3233
#include "types.h"
3334
#include "uci.h"
35+
#include "nnue/nnue_accumulator.h"
3436

3537
namespace Stockfish {
3638

@@ -45,7 +47,10 @@ int Eval::simple_eval(const Position& pos, Color c) {
4547

4648
// Evaluate is the evaluator for the outer world. It returns a static evaluation
4749
// of the position from the point of view of the side to move.
48-
Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos, int optimism) {
50+
Value Eval::evaluate(const Eval::NNUE::Networks& networks,
51+
const Position& pos,
52+
Eval::NNUE::AccumulatorCaches& caches,
53+
int optimism) {
4954

5055
assert(!pos.checkers());
5156

@@ -55,8 +60,8 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
5560
int nnueComplexity;
5661
int v;
5762

58-
Value nnue = smallNet ? networks.small.evaluate(pos, true, &nnueComplexity, psqtOnly)
59-
: networks.big.evaluate(pos, true, &nnueComplexity, false);
63+
Value nnue = smallNet ? networks.small.evaluate(pos, nullptr, true, &nnueComplexity, psqtOnly)
64+
: networks.big.evaluate(pos, &caches.big, true, &nnueComplexity, false);
6065

6166
const auto adjustEval = [&](int optDiv, int nnueDiv, int npmDiv, int pawnCountConstant,
6267
int pawnCountMul, int npmConstant, int evalDiv,
@@ -94,20 +99,22 @@ Value Eval::evaluate(const Eval::NNUE::Networks& networks, const Position& pos,
9499
// Trace scores are from white's point of view
95100
std::string Eval::trace(Position& pos, const Eval::NNUE::Networks& networks) {
96101

102+
auto caches = std::make_unique<Eval::NNUE::AccumulatorCaches>();
103+
97104
if (pos.checkers())
98105
return "Final evaluation: none (in check)";
99106

100107
std::stringstream ss;
101108
ss << std::showpoint << std::noshowpos << std::fixed << std::setprecision(2);
102-
ss << '\n' << NNUE::trace(pos, networks) << '\n';
109+
ss << '\n' << NNUE::trace(pos, networks, *caches) << '\n';
103110

104111
ss << std::showpoint << std::showpos << std::fixed << std::setprecision(2) << std::setw(15);
105112

106-
Value v = networks.big.evaluate(pos, false);
113+
Value v = networks.big.evaluate(pos, &caches->big, false);
107114
v = pos.side_to_move() == WHITE ? v : -v;
108115
ss << "NNUE evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)\n";
109116

110-
v = evaluate(networks, pos, VALUE_ZERO);
117+
v = evaluate(networks, pos, *caches, VALUE_ZERO);
111118
v = pos.side_to_move() == WHITE ? v : -v;
112119
ss << "Final evaluation " << 0.01 * UCIEngine::to_cp(v, pos) << " (white side)";
113120
ss << " [with scaled NNUE, ...]";

src/evaluate.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,16 @@ constexpr inline int SmallNetThreshold = 1274, PsqtOnlyThreshold = 2389;
4040

4141
namespace NNUE {
4242
struct Networks;
43+
struct AccumulatorCaches;
4344
}
4445

4546
std::string trace(Position& pos, const Eval::NNUE::Networks& networks);
4647

4748
int simple_eval(const Position& pos, Color c);
48-
Value evaluate(const NNUE::Networks& networks, const Position& pos, int optimism);
49-
50-
49+
Value evaluate(const NNUE::Networks& networks,
50+
const Position& pos,
51+
Eval::NNUE::AccumulatorCaches& caches,
52+
int optimism);
5153
} // namespace Eval
5254

5355
} // namespace Stockfish

src/nnue/features/half_ka_v2_hm.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#include "../../bitboard.h"
2424
#include "../../position.h"
2525
#include "../../types.h"
26-
#include "../nnue_common.h"
26+
#include "../nnue_accumulator.h"
2727

2828
namespace Stockfish::Eval::NNUE::Features {
2929

@@ -49,6 +49,8 @@ void HalfKAv2_hm::append_active_indices(const Position& pos, IndexList& active)
4949
// Explicit template instantiations
5050
template void HalfKAv2_hm::append_active_indices<WHITE>(const Position& pos, IndexList& active);
5151
template void HalfKAv2_hm::append_active_indices<BLACK>(const Position& pos, IndexList& active);
52+
template IndexType HalfKAv2_hm::make_index<WHITE>(Square s, Piece pc, Square ksq);
53+
template IndexType HalfKAv2_hm::make_index<BLACK>(Square s, Piece pc, Square ksq);
5254

5355
// Get a list of indices for recently changed features
5456
template<Color Perspective>

src/nnue/features/half_ka_v2_hm.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,6 @@ class HalfKAv2_hm {
6363
{PS_NONE, PS_B_PAWN, PS_B_KNIGHT, PS_B_BISHOP, PS_B_ROOK, PS_B_QUEEN, PS_KING, PS_NONE,
6464
PS_NONE, PS_W_PAWN, PS_W_KNIGHT, PS_W_BISHOP, PS_W_ROOK, PS_W_QUEEN, PS_KING, PS_NONE}};
6565

66-
// Index of a feature for a given king position and another piece on some square
67-
template<Color Perspective>
68-
static IndexType make_index(Square s, Piece pc, Square ksq);
69-
7066
public:
7167
// Feature name
7268
static constexpr const char* Name = "HalfKAv2_hm(Friend)";
@@ -126,6 +122,10 @@ class HalfKAv2_hm {
126122
static constexpr IndexType MaxActiveDimensions = 32;
127123
using IndexList = ValueList<IndexType, MaxActiveDimensions>;
128124

125+
// Index of a feature for a given king position and another piece on some square
126+
template<Color Perspective>
127+
static IndexType make_index(Square s, Piece pc, Square ksq);
128+
129129
// Get a list of indices for active features
130130
template<Color Perspective>
131131
static void append_active_indices(const Position& pos, IndexList& active);

src/nnue/network.cpp

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -186,31 +186,33 @@ bool Network<Arch, Transformer>::save(const std::optional<std::string>& filename
186186

187187

188188
template<typename Arch, typename Transformer>
189-
Value Network<Arch, Transformer>::evaluate(const Position& pos,
190-
bool adjusted,
191-
int* complexity,
192-
bool psqtOnly) const {
189+
Value Network<Arch, Transformer>::evaluate(const Position& pos,
190+
AccumulatorCaches::Cache<FTDimensions>* cache,
191+
bool adjusted,
192+
int* complexity,
193+
bool psqtOnly) const {
193194
// We manually align the arrays on the stack because with gcc < 9.3
194195
// overaligning stack variables with alignas() doesn't work correctly.
195196

196197
constexpr uint64_t alignment = CacheLineSize;
197198
constexpr int delta = 24;
198199

199200
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
200-
TransformedFeatureType transformedFeaturesUnaligned
201-
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
202-
+ alignment / sizeof(TransformedFeatureType)];
201+
TransformedFeatureType
202+
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
203+
+ alignment / sizeof(TransformedFeatureType)];
203204

204205
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
205206
#else
206-
alignas(alignment) TransformedFeatureType transformedFeatures
207-
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
207+
alignas(alignment) TransformedFeatureType
208+
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
208209
#endif
209210

210211
ASSERT_ALIGNED(transformedFeatures, alignment);
211212

212213
const int bucket = (pos.count<ALL_PIECES>() - 1) / 4;
213-
const auto psqt = featureTransformer->transform(pos, transformedFeatures, bucket, psqtOnly);
214+
const auto psqt =
215+
featureTransformer->transform(pos, cache, transformedFeatures, bucket, psqtOnly);
214216
const auto positional = !psqtOnly ? (network[bucket]->propagate(transformedFeatures)) : 0;
215217

216218
if (complexity)
@@ -255,26 +257,29 @@ void Network<Arch, Transformer>::verify(std::string evalfilePath) const {
255257

256258

257259
template<typename Arch, typename Transformer>
258-
void Network<Arch, Transformer>::hint_common_access(const Position& pos, bool psqtOnl) const {
259-
featureTransformer->hint_common_access(pos, psqtOnl);
260+
void Network<Arch, Transformer>::hint_common_access(const Position& pos,
261+
AccumulatorCaches::Cache<FTDimensions>* cache,
262+
bool psqtOnl) const {
263+
featureTransformer->hint_common_access(pos, cache, psqtOnl);
260264
}
261265

262-
263266
template<typename Arch, typename Transformer>
264-
NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) const {
267+
NnueEvalTrace
268+
Network<Arch, Transformer>::trace_evaluate(const Position& pos,
269+
AccumulatorCaches::Cache<FTDimensions>* cache) const {
265270
// We manually align the arrays on the stack because with gcc < 9.3
266271
// overaligning stack variables with alignas() doesn't work correctly.
267272
constexpr uint64_t alignment = CacheLineSize;
268273

269274
#if defined(ALIGNAS_ON_STACK_VARIABLES_BROKEN)
270-
TransformedFeatureType transformedFeaturesUnaligned
271-
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize
272-
+ alignment / sizeof(TransformedFeatureType)];
275+
TransformedFeatureType
276+
transformedFeaturesUnaligned[FeatureTransformer<FTDimensions, nullptr>::BufferSize
277+
+ alignment / sizeof(TransformedFeatureType)];
273278

274279
auto* transformedFeatures = align_ptr_up<alignment>(&transformedFeaturesUnaligned[0]);
275280
#else
276-
alignas(alignment) TransformedFeatureType transformedFeatures
277-
[FeatureTransformer<Arch::TransformedFeatureDimensions, nullptr>::BufferSize];
281+
alignas(alignment) TransformedFeatureType
282+
transformedFeatures[FeatureTransformer<FTDimensions, nullptr>::BufferSize];
278283
#endif
279284

280285
ASSERT_ALIGNED(transformedFeatures, alignment);
@@ -284,7 +289,7 @@ NnueEvalTrace Network<Arch, Transformer>::trace_evaluate(const Position& pos) co
284289
for (IndexType bucket = 0; bucket < LayerStacks; ++bucket)
285290
{
286291
const auto materialist =
287-
featureTransformer->transform(pos, transformedFeatures, bucket, false);
292+
featureTransformer->transform(pos, cache, transformedFeatures, bucket, false);
288293
const auto positional = network[bucket]->propagate(transformedFeatures);
289294

290295
t.psqt[bucket] = static_cast<Value>(materialist / OutputScale);

src/nnue/network.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@
3131
#include "nnue_architecture.h"
3232
#include "nnue_feature_transformer.h"
3333
#include "nnue_misc.h"
34+
#include "nnue_accumulator.h"
3435

3536
namespace Stockfish::Eval::NNUE {
3637

37-
3838
enum class EmbeddedNNUEType {
3939
BIG,
4040
SMALL,
@@ -43,6 +43,8 @@ enum class EmbeddedNNUEType {
4343

4444
template<typename Arch, typename Transformer>
4545
class Network {
46+
static constexpr IndexType FTDimensions = Arch::TransformedFeatureDimensions;
47+
4648
public:
4749
Network(EvalFile file, EmbeddedNNUEType type) :
4850
evalFile(file),
@@ -51,17 +53,20 @@ class Network {
5153
void load(const std::string& rootDirectory, std::string evalfilePath);
5254
bool save(const std::optional<std::string>& filename) const;
5355

56+
Value evaluate(const Position& pos,
57+
AccumulatorCaches::Cache<FTDimensions>* cache,
58+
bool adjusted = false,
59+
int* complexity = nullptr,
60+
bool psqtOnly = false) const;
5461

55-
Value evaluate(const Position& pos,
56-
bool adjusted = false,
57-
int* complexity = nullptr,
58-
bool psqtOnly = false) const;
5962

60-
61-
void hint_common_access(const Position& pos, bool psqtOnl) const;
63+
void hint_common_access(const Position& pos,
64+
AccumulatorCaches::Cache<FTDimensions>* cache,
65+
bool psqtOnl) const;
6266

6367
void verify(std::string evalfilePath) const;
64-
NnueEvalTrace trace_evaluate(const Position& pos) const;
68+
NnueEvalTrace trace_evaluate(const Position& pos,
69+
AccumulatorCaches::Cache<FTDimensions>* cache) const;
6570

6671
private:
6772
void load_user_net(const std::string&, const std::string&);
@@ -89,6 +94,9 @@ class Network {
8994

9095
// Hash value of evaluation function structure
9196
static constexpr std::uint32_t hash = Transformer::get_hash_value() ^ Arch::get_hash_value();
97+
98+
template<IndexType Size>
99+
friend struct AccumulatorCaches::Cache;
92100
};
93101

94102
// Definitions of the network types

src/nnue/nnue_accumulator.h

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,75 @@
2828

2929
namespace Stockfish::Eval::NNUE {
3030

31+
using BiasType = std::int16_t;
32+
using PSQTWeightType = std::int32_t;
33+
using IndexType = std::uint32_t;
34+
3135
// Class that holds the result of affine transformation of input features
3236
template<IndexType Size>
3337
struct alignas(CacheLineSize) Accumulator {
34-
std::int16_t accumulation[2][Size];
35-
std::int32_t psqtAccumulation[2][PSQTBuckets];
36-
bool computed[2];
37-
bool computedPSQT[2];
38+
std::int16_t accumulation[COLOR_NB][Size];
39+
std::int32_t psqtAccumulation[COLOR_NB][PSQTBuckets];
40+
bool computed[COLOR_NB];
41+
bool computedPSQT[COLOR_NB];
42+
};
43+
44+
45+
// AccumulatorCaches struct provides per-thread accumulator caches, where each
46+
// cache contains multiple entries for each of the possible king squares.
47+
// When the accumulator needs to be refreshed, the cached entry is used to more
48+
// efficiently update the accumulator, instead of rebuilding it from scratch.
49+
// This idea, was first described by Luecx (author of Koivisto) and
50+
// is commonly referred to as "Finny Tables".
51+
struct AccumulatorCaches {
52+
53+
template<IndexType Size>
54+
struct alignas(CacheLineSize) Cache {
55+
56+
struct alignas(CacheLineSize) Entry {
57+
BiasType accumulation[COLOR_NB][Size];
58+
PSQTWeightType psqtAccumulation[COLOR_NB][PSQTBuckets];
59+
Bitboard byColorBB[COLOR_NB][COLOR_NB];
60+
Bitboard byTypeBB[COLOR_NB][PIECE_TYPE_NB];
61+
62+
// To initialize a refresh entry, we set all its bitboards empty,
63+
// so we put the biases in the accumulation, without any weights on top
64+
void clear(const BiasType* biases) {
65+
66+
std::memset(byColorBB, 0, sizeof(byColorBB));
67+
std::memset(byTypeBB, 0, sizeof(byTypeBB));
68+
69+
std::memcpy(accumulation[WHITE], biases, Size * sizeof(BiasType));
70+
std::memcpy(accumulation[BLACK], biases, Size * sizeof(BiasType));
71+
72+
std::memset(psqtAccumulation, 0, sizeof(psqtAccumulation));
73+
}
74+
};
75+
76+
template<typename Network>
77+
void clear(const Network& network) {
78+
for (auto& entry : entries)
79+
entry.clear(network.featureTransformer->biases);
80+
}
81+
82+
void clear(const BiasType* biases) {
83+
for (auto& entry : entries)
84+
entry.clear(biases);
85+
}
86+
87+
Entry& operator[](Square sq) { return entries[sq]; }
88+
89+
std::array<Entry, SQUARE_NB> entries;
90+
};
91+
92+
template<typename Networks>
93+
void clear(const Networks& networks) {
94+
big.clear(networks.big);
95+
}
96+
97+
// When adding a new cache for a network, i.e. the smallnet
98+
// the appropriate condition must be added to FeatureTransformer::update_accumulator_refresh.
99+
Cache<TransformedFeatureDimensionsBig> big;
38100
};
39101

40102
} // namespace Stockfish::Eval::NNUE

0 commit comments

Comments
 (0)