Skip to content

Commit abd835d

Browse files
anematodevondele
authored andcommitted
Improve update_piece_threats
passed on avx512ICL: LLR: 2.95 (-2.94,2.94) <0.00,2.00> Total: 30240 W: 8026 L: 7726 D: 14488 Ptnml(0-2): 95, 3235, 8171, 3513, 106 https://tests.stockfishchess.org/tests/view/69281d9ab23dfeae38cfeeb8 passed on generic architectures: LLR: 2.94 (-2.94,2.94) <0.00,2.00> Total: 73184 W: 19183 L: 18821 D: 35180 Ptnml(0-2): 258, 7988, 19744, 8338, 264 https://tests.stockfishchess.org/tests/view/6928ba11b23dfeae38cff276 subsequent cleanups tested as: LLR: 2.94 (-2.94,2.94) <-1.75,0.25> Total: 72480 W: 18678 L: 18502 D: 35300 Ptnml(0-2): 242, 7925, 19718, 8125, 230 https://tests.stockfishchess.org/tests/view/692a26adb23dfeae38cff566 We add an argument noRaysContaining, which skips all discoveries which contain all bits in the argument; if the argument is from | to, then this will eliminate the discovery. Separately, on AVX512ICL we can speed up the computation of DirtyThreats by moving from a pop_lsb loop to a vector extraction with vpcompressb. See PR for details. closes #6453 No functional change
1 parent 9e2ee13 commit abd835d

File tree

4 files changed

+109
-24
lines changed

4 files changed

+109
-24
lines changed

src/misc.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,13 @@ class ValueList {
142142
const T* end() const { return values_ + size_; }
143143
const T& operator[](int index) const { return values_[index]; }
144144

145+
T* make_space(size_t count) {
146+
T* result = &values_[size_];
147+
size_ += count;
148+
assert(size_ <= MaxSize);
149+
return result;
150+
}
151+
145152
private:
146153
T values_[MaxSize];
147154
std::size_t size_ = 0;

src/position.cpp

Lines changed: 80 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,8 +1052,51 @@ inline void add_dirty_threat(
10521052
dts->list.push_back({pc, threatened, s, threatenedSq, PutPiece});
10531053
}
10541054

1055+
#ifdef USE_AVX512ICL
1056+
// Given a DirtyThreat template and bit offsets to insert the piece type and square, write the threats
1057+
// present at the given bitboard.
1058+
template<int SqShift, int PcShift>
1059+
void write_multiple_dirties(const Position& p,
1060+
Bitboard mask,
1061+
DirtyThreat dt_template,
1062+
DirtyThreats* dts) {
1063+
static_assert(sizeof(DirtyThreat) == 4);
1064+
1065+
const __m512i board = _mm512_loadu_si512(p.piece_array().data());
1066+
const __m512i AllSquares = _mm512_set_epi8(
1067+
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41,
1068+
40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
1069+
17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
1070+
1071+
const int dt_count = popcount(mask);
1072+
assert(dt_count <= 16);
1073+
1074+
const __m512i template_v = _mm512_set1_epi32(dt_template.raw());
1075+
auto* write = dts->list.make_space(dt_count);
1076+
1077+
// Extract the list of squares and upconvert to 32 bits. There are never more than 16
1078+
// incoming threats so this is sufficient.
1079+
__m512i threat_squares = _mm512_maskz_compress_epi8(mask, AllSquares);
1080+
threat_squares = _mm512_cvtepi8_epi32(_mm512_castsi512_si128(threat_squares));
1081+
1082+
__m512i threat_pieces =
1083+
_mm512_maskz_permutexvar_epi8(0x1111111111111111ULL, threat_squares, board);
1084+
1085+
// Shift the piece and square into place
1086+
threat_squares = _mm512_slli_epi32(threat_squares, SqShift);
1087+
threat_pieces = _mm512_slli_epi32(threat_pieces, PcShift);
1088+
1089+
const __m512i dirties =
1090+
_mm512_ternarylogic_epi32(template_v, threat_squares, threat_pieces, 254 /* A | B | C */);
1091+
_mm512_storeu_si512(reinterpret_cast<__m512i*>(write), dirties);
1092+
}
1093+
#endif
1094+
10551095
template<bool PutPiece, bool ComputeRay>
1056-
void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts) {
1096+
void Position::update_piece_threats(Piece pc,
1097+
Square s,
1098+
DirtyThreats* const dts,
1099+
Bitboard noRaysContaining) {
10571100
const Bitboard occupied = pieces();
10581101
const Bitboard rookQueens = pieces(ROOK, QUEEN);
10591102
const Bitboard bishopQueens = pieces(BISHOP, QUEEN);
@@ -1093,7 +1136,36 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
10931136
}
10941137

10951138
threatened &= occupied;
1139+
Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks);
1140+
Bitboard incoming_threats =
1141+
(PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb<PAWN>(s, WHITE) & blackPawns)
1142+
| (attacks_bb<PAWN>(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings);
1143+
1144+
#ifdef USE_AVX512ICL
1145+
if (threatened)
1146+
{
1147+
if constexpr (PutPiece)
1148+
{
1149+
dts->threatenedSqs |= threatened;
1150+
dts->threateningSqs |= square_bb(s);
1151+
}
1152+
1153+
DirtyThreat dt_template{pc, NO_PIECE, s, Square(0), PutPiece};
1154+
write_multiple_dirties<DirtyThreat::ThreatenedSqOffset, DirtyThreat::ThreatenedPcOffset>(
1155+
*this, threatened, dt_template, dts);
1156+
}
1157+
1158+
Bitboard all_attackers = sliders | incoming_threats;
1159+
if (!all_attackers)
1160+
return; // Square s is threatened iff there's at least one attacker
10961161

1162+
dts->threatenedSqs |= square_bb(s);
1163+
dts->threateningSqs |= all_attackers;
1164+
1165+
DirtyThreat dt_template{NO_PIECE, pc, Square(0), s, PutPiece};
1166+
write_multiple_dirties<DirtyThreat::PcSqOffset, DirtyThreat::PcOffset>(*this, all_attackers,
1167+
dt_template, dts);
1168+
#else
10971169
while (threatened)
10981170
{
10991171
Square threatenedSq = pop_lsb(threatened);
@@ -1104,8 +1176,7 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
11041176

11051177
add_dirty_threat<PutPiece>(dts, pc, threatenedPc, s, threatenedSq);
11061178
}
1107-
1108-
Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks);
1179+
#endif
11091180

11101181
if constexpr (ComputeRay)
11111182
{
@@ -1118,30 +1189,24 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
11181189
const Bitboard discovered = ray & qAttacks & occupied;
11191190

11201191
assert(!more_than_one(discovered));
1121-
if (discovered)
1192+
if (discovered && (RayPassBB[sliderSq][s] & noRaysContaining) != noRaysContaining)
11221193
{
11231194
const Square threatenedSq = lsb(discovered);
11241195
const Piece threatenedPc = piece_on(threatenedSq);
11251196
add_dirty_threat<!PutPiece>(dts, slider, threatenedPc, sliderSq, threatenedSq);
11261197
}
11271198

1199+
#ifndef USE_AVX512ICL // for ICL, direct threats were processed earlier (all_attackers)
11281200
add_dirty_threat<PutPiece>(dts, slider, pc, sliderSq, s);
1201+
#endif
11291202
}
11301203
}
11311204
else
11321205
{
1133-
while (sliders)
1134-
{
1135-
Square sliderSq = pop_lsb(sliders);
1136-
Piece slider = piece_on(sliderSq);
1137-
add_dirty_threat<PutPiece>(dts, slider, pc, sliderSq, s);
1138-
}
1206+
incoming_threats |= sliders;
11391207
}
11401208

1141-
Bitboard incoming_threats =
1142-
(PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb<PAWN>(s, WHITE) & blackPawns)
1143-
| (attacks_bb<PAWN>(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings);
1144-
1209+
#ifndef USE_AVX512ICL
11451210
while (incoming_threats)
11461211
{
11471212
Square srcSq = pop_lsb(incoming_threats);
@@ -1152,6 +1217,7 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
11521217

11531218
add_dirty_threat<PutPiece>(dts, srcPc, pc, srcSq, s);
11541219
}
1220+
#endif
11551221
}
11561222

11571223
// Helper used to do/undo a castling move. This is a bit

src/position.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,10 @@ class Position {
187187

188188
// Other helpers
189189
template<bool PutPiece, bool ComputeRay = true>
190-
void update_piece_threats(Piece pc, Square s, DirtyThreats* const dts);
190+
void update_piece_threats(Piece pc,
191+
Square s,
192+
DirtyThreats* const dts,
193+
Bitboard noRaysContaining = -1ULL);
191194
void move_piece(Square from, Square to, DirtyThreats* const dts = nullptr);
192195
template<bool Do>
193196
void do_castling(Color us,
@@ -372,7 +375,7 @@ inline void Position::move_piece(Square from, Square to, DirtyThreats* const dts
372375
Bitboard fromTo = from | to;
373376

374377
if (dts)
375-
update_piece_threats<false>(pc, from, dts);
378+
update_piece_threats<false>(pc, from, dts, fromTo);
376379

377380
byTypeBB[ALL_PIECES] ^= fromTo;
378381
byTypeBB[type_of(pc)] ^= fromTo;
@@ -381,7 +384,7 @@ inline void Position::move_piece(Square from, Square to, DirtyThreats* const dts
381384
board[to] = pc;
382385

383386
if (dts)
384-
update_piece_threats<true>(pc, to, dts);
387+
update_piece_threats<true>(pc, to, dts, fromTo);
385388
}
386389

387390
inline void Position::swap_piece(Square s, Piece pc, DirtyThreats* const dts) {

src/types.h

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -293,22 +293,31 @@ struct DirtyPiece {
293293

294294
// Keep track of what threats change on the board (used by NNUE)
295295
struct DirtyThreat {
296+
static constexpr int PcSqOffset = 0;
297+
static constexpr int ThreatenedSqOffset = 8;
298+
static constexpr int ThreatenedPcOffset = 16;
299+
static constexpr int PcOffset = 20;
300+
296301
DirtyThreat() { /* don't initialize data */ }
302+
DirtyThreat(uint32_t raw) :
303+
data(raw) {}
297304
DirtyThreat(Piece pc, Piece threatened_pc, Square pc_sq, Square threatened_sq, bool add) {
298-
data = (add << 31) | (pc << 20) | (threatened_pc << 16) | (threatened_sq << 8) | (pc_sq);
305+
data = (uint32_t(add) << 31) | (pc << PcOffset) | (threatened_pc << ThreatenedPcOffset)
306+
| (threatened_sq << ThreatenedSqOffset) | (pc_sq << PcSqOffset);
299307
}
300308

301-
Piece pc() const { return static_cast<Piece>(data >> 20 & 0xf); }
302-
Piece threatened_pc() const { return static_cast<Piece>(data >> 16 & 0xf); }
303-
Square threatened_sq() const { return static_cast<Square>(data >> 8 & 0xff); }
304-
Square pc_sq() const { return static_cast<Square>(data & 0xff); }
305-
bool add() const { return data >> 31; }
309+
Piece pc() const { return static_cast<Piece>(data >> 20 & 0xf); }
310+
Piece threatened_pc() const { return static_cast<Piece>(data >> 16 & 0xf); }
311+
Square threatened_sq() const { return static_cast<Square>(data >> 8 & 0xff); }
312+
Square pc_sq() const { return static_cast<Square>(data & 0xff); }
313+
bool add() const { return data >> 31; }
314+
uint32_t raw() const { return data; }
306315

307316
private:
308317
uint32_t data;
309318
};
310319

311-
using DirtyThreatList = ValueList<DirtyThreat, 80>;
320+
using DirtyThreatList = ValueList<DirtyThreat, 96>;
312321

313322
// A piece can be involved in at most 8 outgoing attacks and 16 incoming attacks.
314323
// Moving a piece also can reveal at most 8 discovered attacks.

0 commit comments

Comments
 (0)