Skip to content

Commit 1444837

Browse files
mstemberavondele
authored andcommitted
Remove inline assembly
closes #4698 No functional change
1 parent 3fe0d5c commit 1444837

File tree

1 file changed

+0
-118
lines changed

1 file changed

+0
-118
lines changed

src/nnue/layers/simd.h

Lines changed: 0 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,6 @@
3838
# include <arm_neon.h>
3939
#endif
4040

41-
// The inline asm is only safe for GCC, where it is necessary to get good codegen.
42-
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101693
43-
// Clang does fine without it.
44-
// Play around here: https://godbolt.org/z/7EWqrYq51
45-
#if (defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER))
46-
#define USE_INLINE_ASM
47-
#endif
48-
49-
// Use either the AVX512 or AVX-VNNI version of the VNNI instructions.
50-
#if defined(USE_AVXVNNI)
51-
#define VNNI_PREFIX "%{vex%} "
52-
#else
53-
#define VNNI_PREFIX ""
54-
#endif
55-
5641
namespace Stockfish::Simd {
5742

5843
#if defined (USE_AVX512)
@@ -117,29 +102,11 @@ namespace Stockfish::Simd {
117102
__m512i b) {
118103

119104
# if defined (USE_VNNI)
120-
# if defined (USE_INLINE_ASM)
121-
asm(
122-
"vpdpbusd %[b], %[a], %[acc]\n\t"
123-
: [acc]"+v"(acc)
124-
: [a]"v"(a), [b]"vm"(b)
125-
);
126-
# else
127105
acc = _mm512_dpbusd_epi32(acc, a, b);
128-
# endif
129106
# else
130-
# if defined (USE_INLINE_ASM)
131-
__m512i tmp = _mm512_maddubs_epi16(a, b);
132-
asm(
133-
"vpmaddwd %[tmp], %[ones], %[tmp]\n\t"
134-
"vpaddd %[acc], %[tmp], %[acc]\n\t"
135-
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
136-
: [ones]"v"(_mm512_set1_epi16(1))
137-
);
138-
# else
139107
__m512i product0 = _mm512_maddubs_epi16(a, b);
140108
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
141109
acc = _mm512_add_epi32(acc, product0);
142-
# endif
143110
# endif
144111
}
145112

@@ -149,36 +116,14 @@ namespace Stockfish::Simd {
149116
__m512i a1, __m512i b1) {
150117

151118
# if defined (USE_VNNI)
152-
# if defined (USE_INLINE_ASM)
153-
asm(
154-
"vpdpbusd %[b0], %[a0], %[acc]\n\t"
155-
"vpdpbusd %[b1], %[a1], %[acc]\n\t"
156-
: [acc]"+&v"(acc)
157-
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
158-
);
159-
# else
160119
acc = _mm512_dpbusd_epi32(acc, a0, b0);
161120
acc = _mm512_dpbusd_epi32(acc, a1, b1);
162-
# endif
163121
# else
164-
# if defined (USE_INLINE_ASM)
165-
__m512i tmp0 = _mm512_maddubs_epi16(a0, b0);
166-
__m512i tmp1 = _mm512_maddubs_epi16(a1, b1);
167-
asm(
168-
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
169-
"vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t"
170-
"vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t"
171-
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
172-
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1)
173-
: [ones]"v"(_mm512_set1_epi16(1))
174-
);
175-
# else
176122
__m512i product0 = _mm512_maddubs_epi16(a0, b0);
177123
__m512i product1 = _mm512_maddubs_epi16(a1, b1);
178124
product0 = _mm512_madd_epi16(product0, _mm512_set1_epi16(1));
179125
product1 = _mm512_madd_epi16(product1, _mm512_set1_epi16(1));
180126
acc = _mm512_add_epi32(acc, _mm512_add_epi32(product0, product1));
181-
# endif
182127
# endif
183128
}
184129

@@ -214,29 +159,11 @@ namespace Stockfish::Simd {
214159
__m256i b) {
215160

216161
# if defined (USE_VNNI)
217-
# if defined (USE_INLINE_ASM)
218-
asm(
219-
VNNI_PREFIX "vpdpbusd %[b], %[a], %[acc]\n\t"
220-
: [acc]"+v"(acc)
221-
: [a]"v"(a), [b]"vm"(b)
222-
);
223-
# else
224162
acc = _mm256_dpbusd_epi32(acc, a, b);
225-
# endif
226163
# else
227-
# if defined (USE_INLINE_ASM)
228-
__m256i tmp = _mm256_maddubs_epi16(a, b);
229-
asm(
230-
"vpmaddwd %[tmp], %[ones], %[tmp]\n\t"
231-
"vpaddd %[acc], %[tmp], %[acc]\n\t"
232-
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
233-
: [ones]"v"(_mm256_set1_epi16(1))
234-
);
235-
# else
236164
__m256i product0 = _mm256_maddubs_epi16(a, b);
237165
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
238166
acc = _mm256_add_epi32(acc, product0);
239-
# endif
240167
# endif
241168
}
242169

@@ -246,36 +173,14 @@ namespace Stockfish::Simd {
246173
__m256i a1, __m256i b1) {
247174

248175
# if defined (USE_VNNI)
249-
# if defined (USE_INLINE_ASM)
250-
asm(
251-
VNNI_PREFIX "vpdpbusd %[b0], %[a0], %[acc]\n\t"
252-
VNNI_PREFIX "vpdpbusd %[b1], %[a1], %[acc]\n\t"
253-
: [acc]"+&v"(acc)
254-
: [a0]"v"(a0), [b0]"vm"(b0), [a1]"v"(a1), [b1]"vm"(b1)
255-
);
256-
# else
257176
acc = _mm256_dpbusd_epi32(acc, a0, b0);
258177
acc = _mm256_dpbusd_epi32(acc, a1, b1);
259-
# endif
260178
# else
261-
# if defined (USE_INLINE_ASM)
262-
__m256i tmp0 = _mm256_maddubs_epi16(a0, b0);
263-
__m256i tmp1 = _mm256_maddubs_epi16(a1, b1);
264-
asm(
265-
"vpmaddwd %[tmp0], %[ones], %[tmp0]\n\t"
266-
"vpmaddwd %[tmp1], %[ones], %[tmp1]\n\t"
267-
"vpaddd %[tmp0], %[tmp1], %[tmp0]\n\t"
268-
"vpaddd %[acc], %[tmp0], %[acc]\n\t"
269-
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1)
270-
: [ones]"v"(_mm256_set1_epi16(1))
271-
);
272-
# else
273179
__m256i product0 = _mm256_maddubs_epi16(a0, b0);
274180
__m256i product1 = _mm256_maddubs_epi16(a1, b1);
275181
product0 = _mm256_madd_epi16(product0, _mm256_set1_epi16(1));
276182
product1 = _mm256_madd_epi16(product1, _mm256_set1_epi16(1));
277183
acc = _mm256_add_epi32(acc, _mm256_add_epi32(product0, product1));
278-
# endif
279184
# endif
280185
}
281186

@@ -304,44 +209,21 @@ namespace Stockfish::Simd {
304209
__m128i a,
305210
__m128i b) {
306211

307-
# if defined (USE_INLINE_ASM)
308-
__m128i tmp = _mm_maddubs_epi16(a, b);
309-
asm(
310-
"pmaddwd %[ones], %[tmp]\n\t"
311-
"paddd %[tmp], %[acc]\n\t"
312-
: [acc]"+v"(acc), [tmp]"+&v"(tmp)
313-
: [ones]"v"(_mm_set1_epi16(1))
314-
);
315-
# else
316212
__m128i product0 = _mm_maddubs_epi16(a, b);
317213
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
318214
acc = _mm_add_epi32(acc, product0);
319-
# endif
320215
}
321216

322217
[[maybe_unused]] static void m128_add_dpbusd_epi32x2(
323218
__m128i& acc,
324219
__m128i a0, __m128i b0,
325220
__m128i a1, __m128i b1) {
326221

327-
# if defined (USE_INLINE_ASM)
328-
__m128i tmp0 = _mm_maddubs_epi16(a0, b0);
329-
__m128i tmp1 = _mm_maddubs_epi16(a1, b1);
330-
asm(
331-
"pmaddwd %[ones], %[tmp0]\n\t"
332-
"pmaddwd %[ones], %[tmp1]\n\t"
333-
"paddd %[tmp1], %[tmp0]\n\t"
334-
"paddd %[tmp0], %[acc]\n\t"
335-
: [acc]"+v"(acc), [tmp0]"+&v"(tmp0), [tmp1]"+&v"(tmp1)
336-
: [ones]"v"(_mm_set1_epi16(1))
337-
);
338-
# else
339222
__m128i product0 = _mm_maddubs_epi16(a0, b0);
340223
__m128i product1 = _mm_maddubs_epi16(a1, b1);
341224
product0 = _mm_madd_epi16(product0, _mm_set1_epi16(1));
342225
product1 = _mm_madd_epi16(product1, _mm_set1_epi16(1));
343226
acc = _mm_add_epi32(acc, _mm_add_epi32(product0, product1));
344-
# endif
345227
}
346228

347229
#endif

0 commit comments

Comments
 (0)