@@ -104,6 +104,28 @@ inline int32_t sumRegisterEpi32(avx_register_type_32& reg) {
104104#endif
105105}
106106
107+ template <bool V>
108+ inline void addWeightsToAccumulator (const int idx, int16_t * accumulator){
109+ const auto wgt = (avx_register_type_16*) (nn::inputWeights[idx]);
110+ const auto sum = (avx_register_type_16*) (accumulator);
111+ if constexpr (V) {
112+ for (int i = 0 ; i < HIDDEN_SIZE / STRIDE_16_BIT / 4 ; i++) {
113+ sum[i * 4 + 0 ] = avx_add_epi16 (sum[i * 4 + 0 ], wgt[i * 4 + 0 ]);
114+ sum[i * 4 + 1 ] = avx_add_epi16 (sum[i * 4 + 1 ], wgt[i * 4 + 1 ]);
115+ sum[i * 4 + 2 ] = avx_add_epi16 (sum[i * 4 + 2 ], wgt[i * 4 + 2 ]);
116+ sum[i * 4 + 3 ] = avx_add_epi16 (sum[i * 4 + 3 ], wgt[i * 4 + 3 ]);
117+ }
118+ } else {
119+ for (int i = 0 ; i < HIDDEN_SIZE / STRIDE_16_BIT / 4 ; i++) {
120+ sum[i * 4 + 0 ] = avx_sub_epi16 (sum[i * 4 + 0 ], wgt[i * 4 + 0 ]);
121+ sum[i * 4 + 1 ] = avx_sub_epi16 (sum[i * 4 + 1 ], wgt[i * 4 + 1 ]);
122+ sum[i * 4 + 2 ] = avx_sub_epi16 (sum[i * 4 + 2 ], wgt[i * 4 + 2 ]);
123+ sum[i * 4 + 3 ] = avx_sub_epi16 (sum[i * 4 + 3 ], wgt[i * 4 + 3 ]);
124+ }
125+ }
126+ }
127+
128+
107129void nn::init () {
108130 int memoryIndex = 0 ;
109131 std::memcpy (inputWeights, &gEvalData [memoryIndex], INPUT_SIZE * HIDDEN_SIZE * sizeof (int16_t ));
@@ -162,30 +184,6 @@ int nn::kingSquareIndex(bb::Square relativeKingSquare, bb::Color kingColor) {
162184 return indices[relativeKingSquare];
163185}
164186
165- void nn::AccumulatorTable::put (bb::Color view, Board* board, nn::Accumulator& accumulator) {
166- const bb::Square king_sq = bb::bitscanForward (board->getPieceBB (view, bb::KING));
167- const bool king_side = bb::fileIndex (king_sq) > 3 ;
168- const int ks_index = kingSquareIndex (king_sq, view);
169-
170- // use a different entry if the king crossed the half but it would technically
171- // still be within the same bucket
172- const int entry_idx = king_side * 16 + ks_index;
173-
174- // get the entry
175- AccumulatorTableEntry& entry = entries[view][entry_idx];
176-
177- // store the accumulator data
178- std::memcpy (entry.accumulator .summation [view], accumulator.summation [view],
179- sizeof (int16_t ) * HIDDEN_SIZE);
180-
181- // store the piece data
182- for (bb::Color c : {bb::WHITE, bb::BLACK}) {
183- for (bb::PieceType pt : {bb::PAWN, bb::KNIGHT, bb::BISHOP, bb::ROOK, bb::QUEEN, bb::KING}) {
184- bb::U64 bb = board->getPieceBB (c, pt);
185- entry.piece_occ [c][pt] = bb;
186- }
187- }
188- }
189187
190188void nn::AccumulatorTable::use (bb::Color view, Board* board, nn::Evaluator& evaluator) {
191189 const bb::Square king_sq = bb::bitscanForward (board->getPieceBB (view, bb::KING));
@@ -198,12 +196,9 @@ void nn::AccumulatorTable::use(bb::Color view, Board* board, nn::Evaluator& eval
198196
199197 // get the entry
200198 AccumulatorTableEntry& entry = entries[view][entry_idx];
201-
202- // first retrieve the accumulator from the table and put that into the evaluator
203- std::memcpy (evaluator.history .back ().summation [view], entry.accumulator .summation [view],
204- sizeof (int16_t ) * HIDDEN_SIZE);
205-
199+
206200 // go through each piece and compute the difference.
201+ // and update the accumulator table entry inplace to only use a single memcpy
207202 for (bb::Color c : {bb::WHITE, bb::BLACK}) {
208203 for (bb::PieceType pt : {bb::PAWN, bb::KNIGHT, bb::BISHOP, bb::ROOK, bb::QUEEN, bb::KING}) {
209204
@@ -219,20 +214,23 @@ void nn::AccumulatorTable::use(bb::Color view, Board* board, nn::Evaluator& eval
219214 // go through both sets and call the evaluator to update the accumulator
220215 while (to_set) {
221216 bb::Square sq = bb::bitscanForward (to_set);
222- evaluator. setPieceOnSquareAccumulator <true >(view, pt, c, sq, king_sq);
217+ addWeightsToAccumulator <true >(nn::index ( pt, c, sq, view, king_sq), entry. accumulator . summation [view] );
223218 to_set = bb::lsbReset (to_set);
224219 }
225220
226221 while (to_unset) {
227222 bb::Square sq = bb::bitscanForward (to_unset);
228- evaluator. setPieceOnSquareAccumulator <false >(view, pt, c, sq, king_sq);
223+ addWeightsToAccumulator <false >(nn::index ( pt, c, sq, view, king_sq), entry. accumulator . summation [view] );
229224 to_unset = bb::lsbReset (to_unset);
230225 }
226+
227+ // store the piece data
228+ entry.piece_occ [c][pt] = board_bb;
231229 }
232230 }
233- // this set has most likely been done on a reset. its handy to just put the new state
234- // into the table
235- put (view, board, evaluator. history . back ());
231+
232+ std::memcpy (evaluator. history . back (). summation [view], entry. accumulator . summation [view], sizeof ( int16_t ) * HIDDEN_SIZE);
233+
236234}
237235
238236void nn::AccumulatorTable::reset () {
@@ -258,25 +256,7 @@ void nn::Evaluator::setPieceOnSquareAccumulator(bb::Color side, bb::PieceType pi
258256 bb::Color pieceColor, bb::Square square,
259257 bb::Square kingSquare) {
260258 const int idx = index (pieceType, pieceColor, square, side, kingSquare);
261-
262- const auto wgt = (avx_register_type_16*) (inputWeights[idx]);
263- const auto sum = (avx_register_type_16*) (history.back ().summation [side]);
264- if constexpr (value) {
265- for (int i = 0 ; i < HIDDEN_SIZE / STRIDE_16_BIT / 4 ; i++) {
266- sum[i * 4 + 0 ] = avx_add_epi16 (sum[i * 4 + 0 ], wgt[i * 4 + 0 ]);
267- sum[i * 4 + 1 ] = avx_add_epi16 (sum[i * 4 + 1 ], wgt[i * 4 + 1 ]);
268- sum[i * 4 + 2 ] = avx_add_epi16 (sum[i * 4 + 2 ], wgt[i * 4 + 2 ]);
269- sum[i * 4 + 3 ] = avx_add_epi16 (sum[i * 4 + 3 ], wgt[i * 4 + 3 ]);
270- }
271- } else {
272- for (int i = 0 ; i < HIDDEN_SIZE / STRIDE_16_BIT / 4 ; i++) {
273- sum[i * 4 + 0 ] = avx_sub_epi16 (sum[i * 4 + 0 ], wgt[i * 4 + 0 ]);
274- sum[i * 4 + 1 ] = avx_sub_epi16 (sum[i * 4 + 1 ], wgt[i * 4 + 1 ]);
275- sum[i * 4 + 2 ] = avx_sub_epi16 (sum[i * 4 + 2 ], wgt[i * 4 + 2 ]);
276- sum[i * 4 + 3 ] = avx_sub_epi16 (sum[i * 4 + 3 ], wgt[i * 4 + 3 ]);
277- }
278- }
279-
259+ addWeightsToAccumulator<value>(idx, history.back ().summation [side]);
280260}
281261
282262void nn::Evaluator::reset (Board* board) {
0 commit comments