@@ -63,19 +63,17 @@ namespace Stockfish::Eval::NNUE::Layers {
6363 {
6464# if defined(USE_SSE2)
6565 // At least a multiple of 16, with SSE2.
66- static_assert (PaddedInputDimensions % 16 == 0 );
67- constexpr IndexType NumChunks = PaddedInputDimensions / 16 ;
66+ constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 16 ) / 16 ;
6867 const __m128i Zeros = _mm_setzero_si128 ();
6968 const auto inputVector = reinterpret_cast <const __m128i*>(input);
7069
7170# elif defined(USE_MMX)
72- static_assert (InputDimensions % 8 == 0 );
73- constexpr IndexType NumChunks = InputDimensions / 8 ;
71+ constexpr IndexType NumChunks = ceil_to_multiple<IndexType>(InputDimensions, 8 ) / 8 ;
7472 const __m64 Zeros = _mm_setzero_si64 ();
7573 const auto inputVector = reinterpret_cast <const __m64*>(input);
7674
7775# elif defined(USE_NEON)
78- constexpr IndexType NumChunks = (InputDimensions + 15 ) / 16 ;
76+ constexpr IndexType NumChunks = ceil_to_multiple<IndexType> (InputDimensions, 16 ) / 16 ;
7977 const auto inputVector = reinterpret_cast <const int8x8_t *>(input);
8078# endif
8179
@@ -150,24 +148,27 @@ namespace Stockfish::Eval::NNUE::Layers {
150148 }
151149#endif
152150
153- template <typename PreviousLayer , IndexType OutDims, typename Enabled = void >
151+ template <IndexType InDims , IndexType OutDims, typename Enabled = void >
154152 class AffineTransform ;
155153
156154 // A specialization for large inputs.
157- template <typename PreviousLayer , IndexType OutDims>
158- class AffineTransform <PreviousLayer , OutDims, std::enable_if_t <(PreviousLayer::OutputDimensions >= 2 *64 - 1 )>> {
155+ template <IndexType InDims , IndexType OutDims>
156+ class AffineTransform <InDims , OutDims, std::enable_if_t <(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) >= 2 *64 )>> {
159157 public:
160158 // Input/output type
161- using InputType = typename PreviousLayer::OutputType ;
159+ using InputType = std:: uint8_t ;
162160 using OutputType = std::int32_t ;
163- static_assert (std::is_same<InputType, std::uint8_t >::value, " " );
164161
165162 // Number of input/output dimensions
166- static constexpr IndexType InputDimensions = PreviousLayer::OutputDimensions ;
163+ static constexpr IndexType InputDimensions = InDims ;
167164 static constexpr IndexType OutputDimensions = OutDims;
168165
169166 static constexpr IndexType PaddedInputDimensions =
170167 ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
168+ static constexpr IndexType PaddedOutputDimensions =
169+ ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
170+
171+ using OutputBuffer = OutputType[PaddedOutputDimensions];
171172
172173 static_assert (PaddedInputDimensions >= 128 , " Something went wrong. This specialization should not have been chosen." );
173174
@@ -202,20 +203,12 @@ namespace Stockfish::Eval::NNUE::Layers {
202203
203204 static_assert (OutputDimensions % NumOutputRegs == 0 );
204205
205- // Size of forward propagation buffer used in this layer
206- static constexpr std::size_t SelfBufferSize =
207- ceil_to_multiple (OutputDimensions * sizeof (OutputType), CacheLineSize);
208-
209- // Size of the forward propagation buffer used from the input layer to this layer
210- static constexpr std::size_t BufferSize =
211- PreviousLayer::BufferSize + SelfBufferSize;
212-
213206 // Hash value embedded in the evaluation file
214- static constexpr std::uint32_t get_hash_value () {
207+ static constexpr std::uint32_t get_hash_value (std:: uint32_t prevhash ) {
215208 std::uint32_t hashValue = 0xCC03DAE4u ;
216209 hashValue += OutputDimensions;
217- hashValue ^= PreviousLayer::get_hash_value () >> 1 ;
218- hashValue ^= PreviousLayer::get_hash_value () << 31 ;
210+ hashValue ^= prevhash >> 1 ;
211+ hashValue ^= prevhash << 31 ;
219212 return hashValue;
220213 }
221214
@@ -242,7 +235,6 @@ namespace Stockfish::Eval::NNUE::Layers {
242235
243236 // Read network parameters
244237 bool read_parameters (std::istream& stream) {
245- if (!previousLayer.read_parameters (stream)) return false ;
246238 for (std::size_t i = 0 ; i < OutputDimensions; ++i)
247239 biases[i] = read_little_endian<BiasType>(stream);
248240
@@ -254,7 +246,6 @@ namespace Stockfish::Eval::NNUE::Layers {
254246
255247 // Write network parameters
256248 bool write_parameters (std::ostream& stream) const {
257- if (!previousLayer.write_parameters (stream)) return false ;
258249 for (std::size_t i = 0 ; i < OutputDimensions; ++i)
259250 write_little_endian<BiasType>(stream, biases[i]);
260251
@@ -266,10 +257,7 @@ namespace Stockfish::Eval::NNUE::Layers {
266257
267258 // Forward propagation
268259 const OutputType* propagate (
269- const TransformedFeatureType* transformedFeatures, char * buffer) const {
270- const auto input = previousLayer.propagate (
271- transformedFeatures, buffer + SelfBufferSize);
272- OutputType* output = reinterpret_cast <OutputType*>(buffer);
260+ const InputType* input, OutputType* output) const {
273261
274262#if defined (USE_AVX512)
275263 using acc_vec_t = __m512i;
@@ -312,7 +300,6 @@ namespace Stockfish::Eval::NNUE::Layers {
312300#if defined (USE_SSSE3) || defined (USE_NEON)
313301 const in_vec_t * invec = reinterpret_cast <const in_vec_t *>(input);
314302
315-
316303 // Perform accumulation to registers for each big block
317304 for (IndexType bigBlock = 0 ; bigBlock < NumBigBlocks; ++bigBlock)
318305 {
@@ -377,26 +364,28 @@ namespace Stockfish::Eval::NNUE::Layers {
377364 using BiasType = OutputType;
378365 using WeightType = std::int8_t ;
379366
380- PreviousLayer previousLayer;
381-
382367 alignas (CacheLineSize) BiasType biases[OutputDimensions];
383368 alignas (CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
384369 };
385370
386- template <typename PreviousLayer , IndexType OutDims>
387- class AffineTransform <PreviousLayer , OutDims, std::enable_if_t <(PreviousLayer::OutputDimensions < 2 *64 - 1 )>> {
371+ template <IndexType InDims , IndexType OutDims>
372+ class AffineTransform <InDims , OutDims, std::enable_if_t <(ceil_to_multiple<IndexType>(InDims, MaxSimdWidth) < 2 *64 )>> {
388373 public:
389374 // Input/output type
390- using InputType = typename PreviousLayer::OutputType;
375+ // Input/output type
376+ using InputType = std::uint8_t ;
391377 using OutputType = std::int32_t ;
392- static_assert (std::is_same<InputType, std::uint8_t >::value, " " );
393378
394379 // Number of input/output dimensions
395- static constexpr IndexType InputDimensions =
396- PreviousLayer::OutputDimensions;
380+ static constexpr IndexType InputDimensions = InDims;
397381 static constexpr IndexType OutputDimensions = OutDims;
382+
398383 static constexpr IndexType PaddedInputDimensions =
399- ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
384+ ceil_to_multiple<IndexType>(InputDimensions, MaxSimdWidth);
385+ static constexpr IndexType PaddedOutputDimensions =
386+ ceil_to_multiple<IndexType>(OutputDimensions, MaxSimdWidth);
387+
388+ using OutputBuffer = OutputType[PaddedOutputDimensions];
400389
401390 static_assert (PaddedInputDimensions < 128 , " Something went wrong. This specialization should not have been chosen." );
402391
@@ -405,20 +394,12 @@ namespace Stockfish::Eval::NNUE::Layers {
405394 static constexpr const IndexType InputSimdWidth = SimdWidth;
406395#endif
407396
408- // Size of forward propagation buffer used in this layer
409- static constexpr std::size_t SelfBufferSize =
410- ceil_to_multiple (OutputDimensions * sizeof (OutputType), CacheLineSize);
411-
412- // Size of the forward propagation buffer used from the input layer to this layer
413- static constexpr std::size_t BufferSize =
414- PreviousLayer::BufferSize + SelfBufferSize;
415-
416397 // Hash value embedded in the evaluation file
417- static constexpr std::uint32_t get_hash_value () {
398+ static constexpr std::uint32_t get_hash_value (std:: uint32_t prevhash ) {
418399 std::uint32_t hashValue = 0xCC03DAE4u ;
419400 hashValue += OutputDimensions;
420- hashValue ^= PreviousLayer::get_hash_value () >> 1 ;
421- hashValue ^= PreviousLayer::get_hash_value () << 31 ;
401+ hashValue ^= prevhash >> 1 ;
402+ hashValue ^= prevhash << 31 ;
422403 return hashValue;
423404 }
424405
@@ -441,7 +422,6 @@ namespace Stockfish::Eval::NNUE::Layers {
441422
442423 // Read network parameters
443424 bool read_parameters (std::istream& stream) {
444- if (!previousLayer.read_parameters (stream)) return false ;
445425 for (std::size_t i = 0 ; i < OutputDimensions; ++i)
446426 biases[i] = read_little_endian<BiasType>(stream);
447427 for (std::size_t i = 0 ; i < OutputDimensions * PaddedInputDimensions; ++i)
@@ -452,7 +432,6 @@ namespace Stockfish::Eval::NNUE::Layers {
452432
453433 // Write network parameters
454434 bool write_parameters (std::ostream& stream) const {
455- if (!previousLayer.write_parameters (stream)) return false ;
456435 for (std::size_t i = 0 ; i < OutputDimensions; ++i)
457436 write_little_endian<BiasType>(stream, biases[i]);
458437
@@ -463,10 +442,7 @@ namespace Stockfish::Eval::NNUE::Layers {
463442 }
464443 // Forward propagation
465444 const OutputType* propagate (
466- const TransformedFeatureType* transformedFeatures, char * buffer) const {
467- const auto input = previousLayer.propagate (
468- transformedFeatures, buffer + SelfBufferSize);
469- const auto output = reinterpret_cast <OutputType*>(buffer);
445+ const InputType* input, OutputType* output) const {
470446
471447#if defined (USE_AVX2)
472448 using vec_t = __m256i;
@@ -491,12 +467,11 @@ namespace Stockfish::Eval::NNUE::Layers {
491467#if defined (USE_SSSE3)
492468 const auto inputVector = reinterpret_cast <const vec_t *>(input);
493469
494- static_assert (InputDimensions % 8 == 0 );
495470 static_assert (OutputDimensions % OutputSimdWidth == 0 || OutputDimensions == 1 );
496471
497472 if constexpr (OutputDimensions % OutputSimdWidth == 0 )
498473 {
499- constexpr IndexType NumChunks = InputDimensions / 4 ;
474+ constexpr IndexType NumChunks = ceil_to_multiple<IndexType>( InputDimensions, 8 ) / 4 ;
500475 constexpr IndexType NumRegs = OutputDimensions / OutputSimdWidth;
501476
502477 const auto input32 = reinterpret_cast <const std::int32_t *>(input);
@@ -555,8 +530,6 @@ namespace Stockfish::Eval::NNUE::Layers {
555530 using BiasType = OutputType;
556531 using WeightType = std::int8_t ;
557532
558- PreviousLayer previousLayer;
559-
560533 alignas (CacheLineSize) BiasType biases[OutputDimensions];
561534 alignas (CacheLineSize) WeightType weights[OutputDimensions * PaddedInputDimensions];
562535 };
0 commit comments