Skip to content

Commit e608eab

Browse files
mstemberavondele
authored andcommitted
Optimize update_accumulator_refresh_cache()
STC https://tests.stockfishchess.org/tests/view/664105df26ac5f9b286d30e6 LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 178528 W: 46235 L: 45750 D: 86543 Ptnml(0-2): 505, 17792, 52142, 18363, 462 Combo of two yellow speedups https://tests.stockfishchess.org/tests/view/6640abf9d163897c63214f5c LLR: -2.93 (-2.94,2.94) <0.00,2.00> Total: 355744 W: 91714 L: 91470 D: 172560 Ptnml(0-2): 913, 36233, 103384, 36381, 961 https://tests.stockfishchess.org/tests/view/6628ce073fe04ce4cefc739c LLR: -2.93 (-2.94,2.94) <0.00,2.00> Total: 627040 W: 162001 L: 161339 D: 303700 Ptnml(0-2): 2268, 72379, 163532, 73105, 2236 closes #5239 No functional change
1 parent 0b08953 commit e608eab

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

src/nnue/nnue_feature_transformer.h

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,11 @@ class FeatureTransformer {
664664

665665
for (IndexType j = 0; j < HalfDimensions / TileHeight; ++j)
666666
{
667-
auto entryTile = reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
667+
auto accTile =
668+
reinterpret_cast<vec_t*>(&accumulator.accumulation[Perspective][j * TileHeight]);
669+
auto entryTile =
670+
reinterpret_cast<vec_t*>(&entry.accumulation[j * TileHeight]);
671+
668672
for (IndexType k = 0; k < NumRegs; ++k)
669673
acc[k] = entryTile[k];
670674

@@ -679,7 +683,7 @@ class FeatureTransformer {
679683
auto columnA = reinterpret_cast<const vec_t*>(&weights[offsetA]);
680684

681685
for (unsigned k = 0; k < NumRegs; ++k)
682-
acc[k] = vec_add_16(vec_sub_16(acc[k], columnR[k]), columnA[k]);
686+
acc[k] = vec_add_16(acc[k], vec_sub_16(columnA[k], columnR[k]));
683687
}
684688
for (; i < int(removed.size()); ++i)
685689
{
@@ -702,12 +706,17 @@ class FeatureTransformer {
702706

703707
for (IndexType k = 0; k < NumRegs; k++)
704708
vec_store(&entryTile[k], acc[k]);
709+
for (IndexType k = 0; k < NumRegs; k++)
710+
vec_store(&accTile[k], acc[k]);
705711
}
706712

707713
for (IndexType j = 0; j < PSQTBuckets / PsqtTileHeight; ++j)
708714
{
709-
auto entryTilePsqt =
710-
reinterpret_cast<psqt_vec_t*>(&entry.psqtAccumulation[j * PsqtTileHeight]);
715+
auto accTilePsqt = reinterpret_cast<psqt_vec_t*>(
716+
&accumulator.psqtAccumulation[Perspective][j * PsqtTileHeight]);
717+
auto entryTilePsqt = reinterpret_cast<psqt_vec_t*>(
718+
&entry.psqtAccumulation[j * PsqtTileHeight]);
719+
711720
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
712721
psqt[k] = entryTilePsqt[k];
713722

@@ -732,6 +741,8 @@ class FeatureTransformer {
732741

733742
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
734743
vec_store_psqt(&entryTilePsqt[k], psqt[k]);
744+
for (std::size_t k = 0; k < NumPsqtRegs; ++k)
745+
vec_store_psqt(&accTilePsqt[k], psqt[k]);
735746
}
736747

737748
#else
@@ -755,8 +766,6 @@ class FeatureTransformer {
755766
entry.psqtAccumulation[k] += psqtWeights[index * PSQTBuckets + k];
756767
}
757768

758-
#endif
759-
760769
// The accumulator of the refresh entry has been updated.
761770
// Now copy its content to the actual accumulator we were refreshing
762771

@@ -765,6 +774,7 @@ class FeatureTransformer {
765774

766775
std::memcpy(accumulator.psqtAccumulation[Perspective], entry.psqtAccumulation,
767776
sizeof(int32_t) * PSQTBuckets);
777+
#endif
768778

769779
for (Color c : {WHITE, BLACK})
770780
entry.byColorBB[c] = pos.pieces(c);

0 commit comments

Comments
 (0)