Skip to content

Commit f3c3afd

Browse files
jkeiserlemire
authored andcommitted
Use direct call to templated flatten_bits instead of if (simdjson#262)
* Use direct call to templated flatten_bits instead of if * Put really_inline back on find_structural_bits_64
1 parent 1e26859 commit f3c3afd

11 files changed

+47
-41
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
6262
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
6363
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
6464

65-
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten.h include/simdjson/stage1_find_marks_flatten_haswell.h
65+
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h include/simdjson/stage1_find_marks_flatten_common.h include/simdjson/stage1_find_marks_flatten_haswell.h
6666
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
6767
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
6868
MINIFIERLIBFILES=src/jsonminifier.cpp

amalgamation.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ $SCRIPTPATH/src/simdjson.cpp
1717
$SCRIPTPATH/src/jsonioutil.cpp
1818
$SCRIPTPATH/src/jsonminifier.cpp
1919
$SCRIPTPATH/src/jsonparser.cpp
20+
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
2021
$SCRIPTPATH/src/stage1_find_marks.cpp
2122
$SCRIPTPATH/src/stage2_build_tape.cpp
2223
$SCRIPTPATH/src/parsedjson.cpp
@@ -41,8 +42,6 @@ $SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
4142
$SCRIPTPATH/include/simdjson/jsonminifier.h
4243
$SCRIPTPATH/include/simdjson/parsedjson.h
4344
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
44-
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten.h
45-
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten_haswell.h
4645
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
4746
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
4847
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h

include/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ set(SIMDJSON_INCLUDE
2020
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks.h
2121
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_arm64.h
2222
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_common.h
23-
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten.h
23+
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_common.h
2424
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_flatten_haswell.h
2525
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_haswell.h
2626
${SIMDJSON_INCLUDE_DIR}/stage1_find_marks_westmere.h

include/simdjson/stage1_find_marks.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,15 @@ int find_structural_bits(const char *buf, size_t len,
116116
return find_structural_bits((const uint8_t *)buf, len, pj);
117117
}
118118

119+
// flatten out values in 'bits' assuming that they are are to have values of idx
120+
// plus their position in the bitvector, and store these indexes at
121+
// base_ptr[base] incrementing base as we go
122+
// will potentially store extra values beyond end of valid bits, so base_ptr
123+
// needs to be large enough to handle this
124+
template <Architecture T = Architecture::NATIVE>
125+
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
126+
uint32_t idx, uint64_t bits);
127+
119128
} // namespace simdjson
120129

121130
#endif

include/simdjson/stage1_find_marks_arm64.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include "simdjson/simdutf8check_arm64.h"
55
#include "simdjson/stage1_find_marks.h"
6-
#include "simdjson/stage1_find_marks_flatten.h"
76

87
#ifdef IS_ARM64
98
namespace simdjson {

include/simdjson/stage1_find_marks_common.h

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ really_inline uint64_t find_quote_mask_and_bits<TARGETED_ARCHITECTURE>(
9292
}
9393

9494
// Find structural bits in a 64-byte chunk.
95-
void find_structural_bits_64(
95+
really_inline void find_structural_bits_64(
9696
const uint8_t *buf, size_t idx, uint32_t *base_ptr, uint32_t &base,
9797
uint64_t &prev_iter_ends_odd_backslash, uint64_t &prev_iter_inside_quote,
9898
uint64_t &prev_iter_ends_pseudo_pred, uint64_t &structurals,
@@ -113,12 +113,7 @@ void find_structural_bits_64(
113113
/* take the previous iterations structural bits, not our current
114114
* iteration,
115115
* and flatten */
116-
#ifdef IS_X86_64
117-
if (TARGETED_ARCHITECTURE == Architecture::HASWELL)
118-
simdjson::haswell::flatten_bits(base_ptr, base, idx, structurals);
119-
else
120-
#endif
121-
simdjson::flatten_bits(base_ptr, base, idx, structurals);
116+
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
122117

123118
uint64_t whitespace;
124119
find_whitespace_and_structurals<TARGETED_ARCHITECTURE>(in, whitespace,
@@ -200,12 +195,7 @@ int find_structural_bits<TARGETED_ARCHITECTURE>(const uint8_t *buf, size_t len,
200195

201196
/* finally, flatten out the remaining structurals from the last iteration
202197
*/
203-
#ifdef IS_X86_64
204-
if (TARGETED_ARCHITECTURE == Architecture::HASWELL)
205-
simdjson::haswell::flatten_bits(base_ptr, base, idx, structurals);
206-
else
207-
#endif
208-
simdjson::flatten_bits(base_ptr, base, idx, structurals);
198+
flatten_bits<TARGETED_ARCHITECTURE>(base_ptr, base, idx, structurals);
209199

210200
pj.n_structural_indexes = base;
211201
/* a valid JSON file cannot have zero structural indexes - we should have

include/simdjson/stage1_find_marks_flatten.h renamed to include/simdjson/stage1_find_marks_flatten_common.h

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,22 @@
1-
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
2-
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
1+
// This file contains a non-architecture-specific version of "flatten" used in stage1.
2+
// It is intended to be included multiple times and compiled multiple times
3+
// We assume the file in which it is include already includes
4+
// "simdjson/stage1_find_marks.h" (this simplifies amalgation)
35

6+
#ifdef TARGETED_ARCHITECTURE
7+
#ifdef TARGETED_REGION
8+
9+
TARGETED_REGION
410
namespace simdjson {
511

612
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
713
//
814
// This is just a naive implementation. It should be normally
915
// disable, but can be used for research purposes to compare
1016
// again our optimized version.
11-
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
12-
uint32_t idx, uint64_t bits) {
17+
template <>
18+
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
19+
uint32_t idx, uint64_t bits) {
1320
uint32_t *out_ptr = base_ptr + base;
1421
idx -= 64;
1522
while (bits != 0) {
@@ -26,8 +33,9 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
2633
// base_ptr[base] incrementing base as we go
2734
// will potentially store extra values beyond end of valid bits, so base_ptr
2835
// needs to be large enough to handle this
29-
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
30-
uint32_t idx, uint64_t bits) {
36+
template<>
37+
really_inline void flatten_bits<TARGETED_ARCHITECTURE>(uint32_t *base_ptr, uint32_t &base,
38+
uint32_t idx, uint64_t bits) {
3139
// In some instances, the next branch is expensive because it is mispredicted.
3240
// Unfortunately, in other cases,
3341
// it helps tremendously.
@@ -88,6 +96,13 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
8896
base = next_base;
8997
}
9098
#endif // SIMDJSON_NAIVE_FLATTEN
99+
91100
} // namespace simdjson
101+
UNTARGET_REGION
92102

93-
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
103+
#else
104+
#error TARGETED_REGION must be specified before including.
105+
#endif // TARGETED_REGION
106+
#else
107+
#error TARGETED_ARCHITECTURE must be specified before including.
108+
#endif // TARGETED_ARCHITECTURE

include/simdjson/stage1_find_marks_flatten_haswell.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
2-
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
3-
41
// This file provides the same function as
5-
// stage1_find_marks_flatten.h, but uses Intel intrinsics.
2+
// stage1_find_marks_flatten_common.h, but uses Intel intrinsics.
63
// This should provide better performance on Visual Studio
74
// and other compilers that do a conservative optimization.
85

@@ -20,15 +17,15 @@
2017

2118
TARGET_HASWELL
2219
namespace simdjson {
23-
namespace haswell {
2420

2521
// flatten out values in 'bits' assuming that they are are to have values of idx
2622
// plus their position in the bitvector, and store these indexes at
2723
// base_ptr[base] incrementing base as we go
2824
// will potentially store extra values beyond end of valid bits, so base_ptr
2925
// needs to be large enough to handle this
30-
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
31-
uint32_t idx, uint64_t bits) {
26+
template<>
27+
really_inline void flatten_bits<Architecture::HASWELL>(uint32_t *base_ptr, uint32_t &base,
28+
uint32_t idx, uint64_t bits) {
3229
// In some instances, the next branch is expensive because it is mispredicted.
3330
// Unfortunately, in other cases,
3431
// it helps tremendously.
@@ -88,8 +85,6 @@ really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
8885
}
8986
base = next_base;
9087
}
91-
} // namespace haswell
9288
} // namespace simdjson
9389
UNTARGET_REGION
9490
#endif // IS_X86_64
95-
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H

include/simdjson/stage1_find_marks_haswell.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include "simdjson/simdutf8check_haswell.h"
55
#include "simdjson/stage1_find_marks.h"
6-
#include "simdjson/stage1_find_marks_flatten_haswell.h"
76

87
#ifdef IS_X86_64
98

include/simdjson/stage1_find_marks_westmere.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
#include "simdjson/simdutf8check_westmere.h"
55
#include "simdjson/stage1_find_marks.h"
6-
#include "simdjson/stage1_find_marks_flatten.h"
76

87
#ifdef IS_X86_64
98

0 commit comments

Comments
 (0)