Skip to content

Commit a89906b

Browse files
committed
Merge branch 'remove_double_memcpy_acc_table'
bench: 4271540
2 parents c045d75 + a221241 commit a89906b

File tree

2 files changed

+34
-56
lines changed

2 files changed

+34
-56
lines changed

src_files/eval.cpp

Lines changed: 33 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,28 @@ inline int32_t sumRegisterEpi32(avx_register_type_32& reg) {
104104
#endif
105105
}
106106

107+
template<bool V>
108+
inline void addWeightsToAccumulator(const int idx, int16_t* accumulator){
109+
const auto wgt = (avx_register_type_16*) (nn::inputWeights[idx]);
110+
const auto sum = (avx_register_type_16*) (accumulator);
111+
if constexpr (V) {
112+
for (int i = 0; i < HIDDEN_SIZE / STRIDE_16_BIT / 4; i++) {
113+
sum[i * 4 + 0] = avx_add_epi16(sum[i * 4 + 0], wgt[i * 4 + 0]);
114+
sum[i * 4 + 1] = avx_add_epi16(sum[i * 4 + 1], wgt[i * 4 + 1]);
115+
sum[i * 4 + 2] = avx_add_epi16(sum[i * 4 + 2], wgt[i * 4 + 2]);
116+
sum[i * 4 + 3] = avx_add_epi16(sum[i * 4 + 3], wgt[i * 4 + 3]);
117+
}
118+
} else {
119+
for (int i = 0; i < HIDDEN_SIZE / STRIDE_16_BIT / 4; i++) {
120+
sum[i * 4 + 0] = avx_sub_epi16(sum[i * 4 + 0], wgt[i * 4 + 0]);
121+
sum[i * 4 + 1] = avx_sub_epi16(sum[i * 4 + 1], wgt[i * 4 + 1]);
122+
sum[i * 4 + 2] = avx_sub_epi16(sum[i * 4 + 2], wgt[i * 4 + 2]);
123+
sum[i * 4 + 3] = avx_sub_epi16(sum[i * 4 + 3], wgt[i * 4 + 3]);
124+
}
125+
}
126+
}
127+
128+
107129
void nn::init() {
108130
int memoryIndex = 0;
109131
std::memcpy(inputWeights, &gEvalData[memoryIndex], INPUT_SIZE * HIDDEN_SIZE * sizeof(int16_t));
@@ -162,30 +184,6 @@ int nn::kingSquareIndex(bb::Square relativeKingSquare, bb::Color kingColor) {
162184
return indices[relativeKingSquare];
163185
}
164186

165-
void nn::AccumulatorTable::put(bb::Color view, Board* board, nn::Accumulator& accumulator) {
166-
const bb::Square king_sq = bb::bitscanForward(board->getPieceBB(view, bb::KING));
167-
const bool king_side = bb::fileIndex(king_sq) > 3;
168-
const int ks_index = kingSquareIndex(king_sq, view);
169-
170-
// use a different entry if the king crossed the half but it would technically
171-
// still be within the same bucket
172-
const int entry_idx = king_side * 16 + ks_index;
173-
174-
// get the entry
175-
AccumulatorTableEntry& entry = entries[view][entry_idx];
176-
177-
// store the accumulator data
178-
std::memcpy(entry.accumulator.summation[view], accumulator.summation[view],
179-
sizeof(int16_t) * HIDDEN_SIZE);
180-
181-
// store the piece data
182-
for (bb::Color c : {bb::WHITE, bb::BLACK}) {
183-
for (bb::PieceType pt : {bb::PAWN, bb::KNIGHT, bb::BISHOP, bb::ROOK, bb::QUEEN, bb::KING}) {
184-
bb::U64 bb = board->getPieceBB(c, pt);
185-
entry.piece_occ[c][pt] = bb;
186-
}
187-
}
188-
}
189187

190188
void nn::AccumulatorTable::use(bb::Color view, Board* board, nn::Evaluator& evaluator) {
191189
const bb::Square king_sq = bb::bitscanForward(board->getPieceBB(view, bb::KING));
@@ -198,12 +196,9 @@ void nn::AccumulatorTable::use(bb::Color view, Board* board, nn::Evaluator& eval
198196

199197
// get the entry
200198
AccumulatorTableEntry& entry = entries[view][entry_idx];
201-
202-
// first retrieve the accumulator from the table and put that into the evaluator
203-
std::memcpy(evaluator.history.back().summation[view], entry.accumulator.summation[view],
204-
sizeof(int16_t) * HIDDEN_SIZE);
205-
199+
206200
// go through each piece and compute the difference.
201+
// and update the accumulator table entry inplace to only use a single memcpy
207202
for (bb::Color c : {bb::WHITE, bb::BLACK}) {
208203
for (bb::PieceType pt : {bb::PAWN, bb::KNIGHT, bb::BISHOP, bb::ROOK, bb::QUEEN, bb::KING}) {
209204

@@ -219,20 +214,23 @@ void nn::AccumulatorTable::use(bb::Color view, Board* board, nn::Evaluator& eval
219214
// go through both sets and call the evaluator to update the accumulator
220215
while (to_set) {
221216
bb::Square sq = bb::bitscanForward(to_set);
222-
evaluator.setPieceOnSquareAccumulator<true>(view, pt, c, sq, king_sq);
217+
addWeightsToAccumulator<true>(nn::index(pt, c, sq, view, king_sq), entry.accumulator.summation[view]);
223218
to_set = bb::lsbReset(to_set);
224219
}
225220

226221
while (to_unset) {
227222
bb::Square sq = bb::bitscanForward(to_unset);
228-
evaluator.setPieceOnSquareAccumulator<false>(view, pt, c, sq, king_sq);
223+
addWeightsToAccumulator<false>(nn::index(pt, c, sq, view, king_sq), entry.accumulator.summation[view]);
229224
to_unset = bb::lsbReset(to_unset);
230225
}
226+
227+
// store the piece data
228+
entry.piece_occ[c][pt] = board_bb;
231229
}
232230
}
233-
// this set has most likely been done on a reset. its handy to just put the new state
234-
// into the table
235-
put(view, board, evaluator.history.back());
231+
232+
std::memcpy(evaluator.history.back().summation[view], entry.accumulator.summation[view],sizeof(int16_t) * HIDDEN_SIZE);
233+
236234
}
237235

238236
void nn::AccumulatorTable::reset() {
@@ -258,25 +256,7 @@ void nn::Evaluator::setPieceOnSquareAccumulator(bb::Color side, bb::PieceType pi
258256
bb::Color pieceColor, bb::Square square,
259257
bb::Square kingSquare) {
260258
const int idx = index(pieceType, pieceColor, square, side, kingSquare);
261-
262-
const auto wgt = (avx_register_type_16*) (inputWeights[idx]);
263-
const auto sum = (avx_register_type_16*) (history.back().summation[side]);
264-
if constexpr (value) {
265-
for (int i = 0; i < HIDDEN_SIZE / STRIDE_16_BIT / 4; i++) {
266-
sum[i * 4 + 0] = avx_add_epi16(sum[i * 4 + 0], wgt[i * 4 + 0]);
267-
sum[i * 4 + 1] = avx_add_epi16(sum[i * 4 + 1], wgt[i * 4 + 1]);
268-
sum[i * 4 + 2] = avx_add_epi16(sum[i * 4 + 2], wgt[i * 4 + 2]);
269-
sum[i * 4 + 3] = avx_add_epi16(sum[i * 4 + 3], wgt[i * 4 + 3]);
270-
}
271-
} else {
272-
for (int i = 0; i < HIDDEN_SIZE / STRIDE_16_BIT / 4; i++) {
273-
sum[i * 4 + 0] = avx_sub_epi16(sum[i * 4 + 0], wgt[i * 4 + 0]);
274-
sum[i * 4 + 1] = avx_sub_epi16(sum[i * 4 + 1], wgt[i * 4 + 1]);
275-
sum[i * 4 + 2] = avx_sub_epi16(sum[i * 4 + 2], wgt[i * 4 + 2]);
276-
sum[i * 4 + 3] = avx_sub_epi16(sum[i * 4 + 3], wgt[i * 4 + 3]);
277-
}
278-
}
279-
259+
addWeightsToAccumulator<value>(idx, history.back().summation[side]);
280260
}
281261

282262
void nn::Evaluator::reset(Board* board) {

src_files/eval.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ void init();
6565
// also takes the view from with we view at the board as well as the king square of the view side
6666
[[nodiscard]] int index(bb::PieceType pieceType, bb::Color pieceColor, bb::Square square,
6767
bb::Color view, bb::Square kingSquare);
68+
6869
// the index is based on a king bucketing system. the relevant king bucket can be retrieved using
6970
// the function below
7071
[[nodiscard]] int kingSquareIndex(bb::Square kingSquare, bb::Color kingColor);
@@ -96,9 +97,6 @@ struct AccumulatorTableEntry {
9697
// used but is the fastest solution.
9798
struct AccumulatorTable {
9899
AccumulatorTableEntry entries[bb::N_COLORS][32] {};
99-
100-
// sets the specific accumulator to store the specified accumulator
101-
void put(bb::Color view, Board* board, Accumulator& accumulator);
102100

103101
void use(bb::Color view, Board* board, Evaluator& evaluator);
104102

0 commit comments

Comments
 (0)