1- #ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
2- #define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
1+ // This file contains a non-architecture-specific version of "flatten" used in stage1.
2+ // It is intended to be included multiple times and compiled multiple times
3+ // We assume the file in which it is include already includes
4+ // "simdjson/stage1_find_marks.h" (this simplifies amalgation)
35
6+ #ifdef TARGETED_ARCHITECTURE
7+ #ifdef TARGETED_REGION
8+
9+ TARGETED_REGION
410namespace simdjson {
511
612#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
713//
814// This is just a naive implementation. It should be normally
915// disable, but can be used for research purposes to compare
1016// again our optimized version.
11- really_inline void flatten_bits (uint32_t *base_ptr, uint32_t &base,
12- uint32_t idx, uint64_t bits) {
17+ template <>
18+ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
19+ uint32_t idx, uint64_t bits) {
1320 uint32_t *out_ptr = base_ptr + base;
1421 idx -= 64 ;
1522 while (bits != 0 ) {
@@ -26,8 +33,9 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
2633// base_ptr[base] incrementing base as we go
2734// will potentially store extra values beyond end of valid bits, so base_ptr
2835// needs to be large enough to handle this
29- really_inline void flatten_bits (uint32_t *base_ptr, uint32_t &base,
30- uint32_t idx, uint64_t bits) {
36+ template <>
37+ really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
38+ uint32_t idx, uint64_t bits) {
3139 // In some instances, the next branch is expensive because it is mispredicted.
3240 // Unfortunately, in other cases,
3341 // it helps tremendously.
@@ -88,6 +96,13 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
8896 base = next_base;
8997}
9098#endif // SIMDJSON_NAIVE_FLATTEN
99+
91100} // namespace simdjson
101+ UNTARGET_REGION
92102
93- #endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
103+ #else
104+ #error TARGETED_REGION must be specified before including.
105+ #endif // TARGETED_REGION
106+ #else
107+ #error TARGETED_ARCHITECTURE must be specified before including.
108+ #endif // TARGETED_ARCHITECTURE
0 commit comments