Skip to content

Commit f46c730

Browse files
mstemberavondele
authored andcommitted
Fix AVX512 build with older compilers
avoids an intrinsic that is missing in gcc < 10. For this target, might trigger another gcc bug on windows that requires up-to-date gcc 8, 9, or 10, or usage of clang. Fixes #2975 closes #2976 No functional change
1 parent 399cddf commit f46c730

File tree

2 files changed

+2
-3
lines changed

2 files changed

+2
-3
lines changed

src/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ endif
416416
ifeq ($(avx512),yes)
417417
CXXFLAGS += -DUSE_AVX512
418418
ifeq ($(comp),$(filter $(comp),gcc clang mingw))
419-
CXXFLAGS += -mavx512bw
419+
CXXFLAGS += -mavx512f -mavx512bw
420420
endif
421421
endif
422422

src/nnue/layers/affine_transform.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,7 @@ namespace Eval::NNUE::Layers {
126126
const auto iv256 = reinterpret_cast<const __m256i*>(&input_vector[kNumChunks]);
127127
const auto row256 = reinterpret_cast<const __m256i*>(&row[kNumChunks]);
128128
__m256i product256 = _mm256_maddubs_epi16(_mm256_loadA_si256(&iv256[0]), _mm256_load_si256(&row256[0]));
129-
product256 = _mm256_madd_epi16(product256, _mm256_set1_epi16(1));
130-
sum = _mm512_add_epi32(sum, _mm512_zextsi256_si512(product256));
129+
sum = _mm512_add_epi32(sum, _mm512_cvtepi16_epi32(product256));
131130
}
132131
output[i] = _mm512_reduce_add_epi32(sum) + biases_[i];
133132

0 commit comments

Comments
 (0)