@@ -1052,8 +1052,51 @@ inline void add_dirty_threat(
10521052 dts->list .push_back ({pc, threatened, s, threatenedSq, PutPiece});
10531053}
10541054
1055+ #ifdef USE_AVX512ICL
1056+ // Given a DirtyThreat template and bit offsets to insert the piece type and square, write the threats
1057+ // present at the given bitboard.
1058+ template <int SqShift, int PcShift>
1059+ void write_multiple_dirties (const Position& p,
1060+ Bitboard mask,
1061+ DirtyThreat dt_template,
1062+ DirtyThreats* dts) {
1063+ static_assert (sizeof (DirtyThreat) == 4 );
1064+
1065+ const __m512i board = _mm512_loadu_si512 (p.piece_array ().data ());
1066+ const __m512i AllSquares = _mm512_set_epi8 (
1067+ 63 , 62 , 61 , 60 , 59 , 58 , 57 , 56 , 55 , 54 , 53 , 52 , 51 , 50 , 49 , 48 , 47 , 46 , 45 , 44 , 43 , 42 , 41 ,
1068+ 40 , 39 , 38 , 37 , 36 , 35 , 34 , 33 , 32 , 31 , 30 , 29 , 28 , 27 , 26 , 25 , 24 , 23 , 22 , 21 , 20 , 19 , 18 ,
1069+ 17 , 16 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
1070+
1071+ const int dt_count = popcount (mask);
1072+ assert (dt_count <= 16 );
1073+
1074+ const __m512i template_v = _mm512_set1_epi32 (dt_template.raw ());
1075+ auto * write = dts->list .make_space (dt_count);
1076+
1077+ // Extract the list of squares and upconvert to 32 bits. There are never more than 16
1078+ // incoming threats so this is sufficient.
1079+ __m512i threat_squares = _mm512_maskz_compress_epi8 (mask, AllSquares);
1080+ threat_squares = _mm512_cvtepi8_epi32 (_mm512_castsi512_si128 (threat_squares));
1081+
1082+ __m512i threat_pieces =
1083+ _mm512_maskz_permutexvar_epi8 (0x1111111111111111ULL , threat_squares, board);
1084+
1085+ // Shift the piece and square into place
1086+ threat_squares = _mm512_slli_epi32 (threat_squares, SqShift);
1087+ threat_pieces = _mm512_slli_epi32 (threat_pieces, PcShift);
1088+
1089+ const __m512i dirties =
1090+ _mm512_ternarylogic_epi32 (template_v, threat_squares, threat_pieces, 254 /* A | B | C */ );
1091+ _mm512_storeu_si512 (reinterpret_cast <__m512i*>(write), dirties);
1092+ }
1093+ #endif
1094+
10551095template <bool PutPiece, bool ComputeRay>
1056- void Position::update_piece_threats (Piece pc, Square s, DirtyThreats* const dts) {
1096+ void Position::update_piece_threats (Piece pc,
1097+ Square s,
1098+ DirtyThreats* const dts,
1099+ Bitboard noRaysContaining) {
10571100 const Bitboard occupied = pieces ();
10581101 const Bitboard rookQueens = pieces (ROOK, QUEEN);
10591102 const Bitboard bishopQueens = pieces (BISHOP, QUEEN);
@@ -1093,7 +1136,36 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
10931136 }
10941137
10951138 threatened &= occupied;
1139+ Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks);
1140+ Bitboard incoming_threats =
1141+ (PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb<PAWN>(s, WHITE) & blackPawns)
1142+ | (attacks_bb<PAWN>(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings);
1143+
1144+ #ifdef USE_AVX512ICL
1145+ if (threatened)
1146+ {
1147+ if constexpr (PutPiece)
1148+ {
1149+ dts->threatenedSqs |= threatened;
1150+ dts->threateningSqs |= square_bb (s);
1151+ }
1152+
1153+ DirtyThreat dt_template{pc, NO_PIECE, s, Square (0 ), PutPiece};
1154+ write_multiple_dirties<DirtyThreat::ThreatenedSqOffset, DirtyThreat::ThreatenedPcOffset>(
1155+ *this , threatened, dt_template, dts);
1156+ }
1157+
1158+ Bitboard all_attackers = sliders | incoming_threats;
1159+ if (!all_attackers)
1160+ return ; // Square s is threatened iff there's at least one attacker
10961161
1162+ dts->threatenedSqs |= square_bb (s);
1163+ dts->threateningSqs |= all_attackers;
1164+
1165+ DirtyThreat dt_template{NO_PIECE, pc, Square (0 ), s, PutPiece};
1166+ write_multiple_dirties<DirtyThreat::PcSqOffset, DirtyThreat::PcOffset>(*this , all_attackers,
1167+ dt_template, dts);
1168+ #else
10971169 while (threatened)
10981170 {
10991171 Square threatenedSq = pop_lsb (threatened);
@@ -1104,8 +1176,7 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
11041176
11051177 add_dirty_threat<PutPiece>(dts, pc, threatenedPc, s, threatenedSq);
11061178 }
1107-
1108- Bitboard sliders = (rookQueens & rAttacks) | (bishopQueens & bAttacks);
1179+ #endif
11091180
11101181 if constexpr (ComputeRay)
11111182 {
@@ -1118,30 +1189,24 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
11181189 const Bitboard discovered = ray & qAttacks & occupied;
11191190
11201191 assert (!more_than_one (discovered));
1121- if (discovered)
1192+ if (discovered && (RayPassBB[sliderSq][s] & noRaysContaining) != noRaysContaining )
11221193 {
11231194 const Square threatenedSq = lsb (discovered);
11241195 const Piece threatenedPc = piece_on (threatenedSq);
11251196 add_dirty_threat<!PutPiece>(dts, slider, threatenedPc, sliderSq, threatenedSq);
11261197 }
11271198
1199+ #ifndef USE_AVX512ICL // for ICL, direct threats were processed earlier (all_attackers)
11281200 add_dirty_threat<PutPiece>(dts, slider, pc, sliderSq, s);
1201+ #endif
11291202 }
11301203 }
11311204 else
11321205 {
1133- while (sliders)
1134- {
1135- Square sliderSq = pop_lsb (sliders);
1136- Piece slider = piece_on (sliderSq);
1137- add_dirty_threat<PutPiece>(dts, slider, pc, sliderSq, s);
1138- }
1206+ incoming_threats |= sliders;
11391207 }
11401208
1141- Bitboard incoming_threats =
1142- (PseudoAttacks[KNIGHT][s] & knights) | (attacks_bb<PAWN>(s, WHITE) & blackPawns)
1143- | (attacks_bb<PAWN>(s, BLACK) & whitePawns) | (PseudoAttacks[KING][s] & kings);
1144-
1209+ #ifndef USE_AVX512ICL
11451210 while (incoming_threats)
11461211 {
11471212 Square srcSq = pop_lsb (incoming_threats);
@@ -1152,6 +1217,7 @@ void Position::update_piece_threats(Piece pc, Square s, DirtyThreats* const dts)
11521217
11531218 add_dirty_threat<PutPiece>(dts, srcPc, pc, srcSq, s);
11541219 }
1220+ #endif
11551221}
11561222
11571223// Helper used to do/undo a castling move. This is a bit
0 commit comments