Skip to content

Commit eba02dc

Browse files
lemireioioioio
authored andcommitted
Runtime dispatch
* Attempt 1 - fn targeting GCC won't work with templates with different targets, need to specialize all the way up the call stack. * Compiles properly with cmake. Does not with the Makefile. * Compilation works with Makefile * instruction_set changes to architecture * some aesthetic changes * fix amalgation and tests + aesthetic changes * This now compiles and passes tests under CLANG * Minor correction. * Trying to make it work on ARM * Adding missing namespace * Missing bracket * Fixing minor compilation issues. * Getting parse to use runtime dispatch * Fixing amalgamation script. * Making sure that NEON is supported. * Fixing typo * Merging simdjson#229 * Manual merge of simdjson#229 by @jkeiser (second part) * Trying another way. * Removing the paral. * Fixing the make file * Let us make the practice run long enough. * Resolved the awful slowness. * Cleaning the README.md * With runtime dispatching, we should not need flags anymore. * Changing isa detection file's name + fixing typos.
1 parent bcabdfc commit eba02dc

39 files changed

+4344
-3598
lines changed

.drone.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ steps:
55
- name: test
66
image: gcc:8
77
commands:
8-
- make -j2
9-
- make quiettest -j2
8+
- make
9+
- make quiettest
1010
- make amalgamate

Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ architecture:=$(shell arch)
1818
ifeq ($(architecture),aarch64)
1919
ARCHFLAGS ?= -march=armv8-a+crc+crypto
2020
else
21-
ARCHFLAGS ?= -march=native
21+
ARCHFLAGS ?= -msse4.2 -mpclmul # lowest supported feature set?
2222
endif
2323

2424
CXXFLAGS = $(ARCHFLAGS) -std=c++17 -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux $(EXTRAFLAGS)
@@ -62,8 +62,8 @@ TESTEXECUTABLES=jsoncheck numberparsingcheck stringparsingcheck pointercheck
6262
COMPARISONEXECUTABLES=minifiercompetition parsingcompetition parseandstatcompetition distinctuseridcompetition allparserscheckfile allparsingcompetition
6363
SUPPLEMENTARYEXECUTABLES=parse_noutf8validation parse_nonumberparsing parse_nostringparsing
6464

65-
HEADERS= include/simdjson/simdutf8check.h include/simdjson/stringparsing.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
66-
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
65+
HEADERS= include/simdjson/simdutf8check_haswell.h include/simdjson/simdutf8check_westmere.h include/simdjson/simdutf8check_arm64.h include/simdjson/stringparsing.h include/simdjson/stringparsing_arm64.h include/simdjson/stringparsing_haswell.h include/simdjson/stringparsing_macros.h include/simdjson/stringparsing_westmere.h include/simdjson/numberparsing.h include/simdjson/jsonparser.h include/simdjson/common_defs.h include/simdjson/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdjson/parsedjson.h include/simdjson/stage1_find_marks.h include/simdjson/stage1_find_marks_arm64.h include/simdjson/stage1_find_marks_haswell.h include/simdjson/stage1_find_marks_westmere.h include/simdjson/stage1_find_marks_macros.h include/simdjson/stage2_build_tape.h include/simdjson/jsoncharutils.h include/simdjson/jsonformatutils.h
66+
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/stage2_build_tape.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp
6767
MINIFIERHEADERS=include/simdjson/jsonminifier.h include/simdjson/simdprune_tables.h
6868
MINIFIERLIBFILES=src/jsonminifier.cpp
6969

@@ -152,7 +152,7 @@ stringparsingcheck:tests/stringparsingcheck.cpp $(HEADERS) $(LIBFILES)
152152
$(CXX) $(CXXFLAGS) -o stringparsingcheck tests/stringparsingcheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp -I. $(LIBFLAGS) -DJSON_TEST_STRINGS
153153

154154
pointercheck:tests/pointercheck.cpp $(HEADERS) $(LIBFILES)
155-
$(CXX) $(CXXFLAGS) -o pointercheck tests/pointercheck.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp -I. $(LIBFLAGS)
155+
$(CXX) $(CXXFLAGS) -o pointercheck tests/pointercheck.cpp src/stage2_build_tape.cpp src/jsonioutil.cpp src/jsonparser.cpp src/simdjson.cpp src/stage1_find_marks.cpp src/parsedjson.cpp src/parsedjsoniterator.cpp -I. $(LIBFLAGS)
156156

157157
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) submodules $(MINIFIERHEADERS) $(LIBFILES) $(MINIFIERLIBFILES)
158158
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) $(MINIFIERLIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS) $(COREDEPSINCLUDE)

README.md

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ On a Skylake processor, the parsing speeds (in GB/s) of various processors on th
5454
- We support platforms like Linux or macOS, as well as Windows through Visual Studio 2017 or later.
5555
- A processor with
5656
- AVX2 (i.e., Intel processors starting with the Haswell microarchitecture released 2013 and AMD processors starting with the Zen microarchitecture released 2017),
57-
- or SSE 4.2 (i.e., Intel processors going back to Nehalem released in 2008 or AMD processors starting with the Jaguar used in the PS4 and XBox One)
57+
- or SSE 4.2 and CLMUL (i.e., Intel processors going back to Westmere released in 2010 or AMD processors starting with the Jaguar used in the PS4 and XBox One)
5858
- or a 64-bit ARM processor (ARMv8-A): this covers a wide range of mobile processors, including all Apple processors currently available for sale, going back as far back as the iPhone 5s (2013).
5959
- A recent C++ compiler (e.g., GNU GCC or LLVM CLANG or Visual Studio 2017), we assume C++17. GNU GCC 7 or better or LLVM's clang 6 or better.
6060
- Some benchmark scripts assume bash and other common utilities, but they are optional.
@@ -174,16 +174,19 @@ int main(int argc, char *argv[]) {
174174
}
175175
```
176176
177-
On Intel and AMD processors, we get best performance by using the hardware support for AVX2 instructions. You have to make sure that you instruct your
178-
compiler to use these instructions as needed. Under compilers such as GNU GCC or LLVM clang, the
179-
flag `-march=native` used on a recent Intel processor (Haswell or better) is sufficient. For portability
180-
of the binary files you can also specify directly the Haswell processor (`-march=haswell`). You may
181-
also use the flags `-mavx2 -mbmi2`. Under Visual Studio, you need to target x64 and add the
182-
flag `/arch:AVX2`.
183-
184177
185178
Note: In some settings, it might be desirable to precompile `simdjson.cpp` instead of including it.
186179
180+
## Runtime dispatch
181+
182+
On Intel and AMD processors, we get best performance by using the hardware support for AVX2 instructions. However, simdjson also
183+
runs on older Intel and AMD processors. We require a minimum feature support of SSE 4.2 and CLMUL (2010 Intel Westmere or better).
184+
The code automatically detects the feature set of your processor and switches to the right function at runtime (a technical
185+
sometimes called runtime dispatch).
186+
187+
188+
We also support 64-bit ARM. We assume NEON support, and if the cryptographic extension is available, we leverage it, at compile-time.
189+
There is no runtime dispatch on ARM.
187190
188191
189192
## Usage (old-school Makefile on platforms like Linux or macOS)
@@ -266,13 +269,13 @@ make test
266269
267270
## Usage (CMake on Windows using Visual Studio)
268271
269-
We assume you have a common Windows PC with at least Visual Studio 2017 and an x64 processor with AVX2 support (2013 Intel Haswell or later) or SSE 4.2 (2008 Nehalem or later).
272+
We assume you have a common Windows PC with at least Visual Studio 2017 and an x64 processor with AVX2 support (2013 Intel Haswell or later) or SSE 4.2 + CLMUL (2010 Westmere or later).
270273
271274
- Grab the simdjson code from GitHub, e.g., by cloning it using [GitHub Desktop](https://desktop.github.com/).
272275
- Install [CMake](https://cmake.org/download/). When you install it, make sure to ask that `cmake` be made available from the command line. Please choose a recent version of cmake.
273276
- Create a subdirectory within simdjson, such as `VisualStudio`.
274277
- Using a shell, go to this newly created directory.
275-
- Type `cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..` in the shell while in the `VisualStudio` repository. (Alternatively, if you want to build a DLL, you may use the command line `cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DSIMDJSON_BUILD_STATIC=OFF ..`.) This will build the code with AVX2 instructions. If your target processor does not support AVX2, you need to replace `cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..` by `cmake -DSIMDJSON_DISABLE_AVX=on -DCMAKE_GENERATOR_PLATFORM=x64 ..` . That is, you need to set the flag to forcefully disable AVX support since we compile with AVX2 instructions *by default*.
278+
- Type `cmake -DCMAKE_GENERATOR_PLATFORM=x64 ..` in the shell while in the `VisualStudio` repository. (Alternatively, if you want to build a DLL, you may use the command line `cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DSIMDJSON_BUILD_STATIC=OFF ..`.)
276279
- This last command (`cmake ...`) created a Visual Studio solution file in the newly created directory (e.g., `simdjson.sln`). Open this file in Visual Studio. You should now be able to build the project and run the tests. For example, in the `Solution Explorer` window (available from the `View` menu), right-click `ALL_BUILD` and select `Build`. To test the code, still in the `Solution Explorer` window, select `RUN_TESTS` and select `Build`.
277280
278281
@@ -324,7 +327,6 @@ To simplify the engineering, we make some assumptions.
324327
325328
- We support UTF-8 (and thus ASCII), nothing else (no Latin, no UTF-16). We do not believe this is a genuine limitation, because we do not think there is any serious application that needs to process JSON data without an ASCII or UTF-8 encoding.
326329
- All strings in the JSON document may have up to 4294967295 bytes in UTF-8 (4GB). To enforce this constraint, we refuse to parse a document that contains more than 4294967295 bytes (4GB). This should accommodate most JSON documents.
327-
- We assume AVX2 support, which is available in all recent mainstream x86 processors produced by AMD and Intel. No support for non-x86 processors is included, though it can be done. We plan to support ARM processors (help is invited).
328330
- In cases of failure, we report a failure without any indication to the nature of the problem. (This can be easily improved without affecting performance.)
329331
- As allowed by the specification, we allow repeated keys within an object (other parsers like sajson do the same).
330332
- Performance is optimized for JSON documents spanning at least a tens kilobytes up to many megabytes: the performance issues with having to parse many tiny JSON documents or one truly enormous JSON document are different.

amalgamation.sh

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,31 @@ $SCRIPTPATH/src/parsedjsoniterator.cpp
2626
# order matters
2727
ALLCHEADERS="
2828
$SCRIPTPATH/include/simdjson/simdjson_version.h
29-
$SCRIPTPATH/include/simdjson/simdjson.h
3029
$SCRIPTPATH/include/simdjson/portability.h
30+
$SCRIPTPATH/include/simdjson/isadetection.h
31+
$SCRIPTPATH/include/simdjson/simdjson.h
3132
$SCRIPTPATH/include/simdjson/common_defs.h
3233
$SCRIPTPATH/include/simdjson/padded_string.h
3334
$SCRIPTPATH/include/simdjson/jsoncharutils.h
3435
$SCRIPTPATH/include/simdjson/jsonformatutils.h
3536
$SCRIPTPATH/include/simdjson/jsonioutil.h
3637
$SCRIPTPATH/include/simdjson/simdprune_tables.h
37-
$SCRIPTPATH/include/simdjson/simdutf8check.h
38-
$SCRIPTPATH/include/simdjson/simdutf8check_neon.h
38+
$SCRIPTPATH/include/simdjson/simdutf8check_haswell.h
39+
$SCRIPTPATH/include/simdjson/simdutf8check_westmere.h
40+
$SCRIPTPATH/include/simdjson/simdutf8check_arm64.h
3941
$SCRIPTPATH/include/simdjson/jsonminifier.h
4042
$SCRIPTPATH/include/simdjson/parsedjson.h
4143
$SCRIPTPATH/include/simdjson/stage1_find_marks.h
44+
$SCRIPTPATH/include/simdjson/stage1_find_marks_flatten.h
45+
$SCRIPTPATH/include/simdjson/stage1_find_marks_macros.h
46+
$SCRIPTPATH/include/simdjson/stage1_find_marks_westmere.h
47+
$SCRIPTPATH/include/simdjson/stage1_find_marks_haswell.h
48+
$SCRIPTPATH/include/simdjson/stage1_find_marks_arm64.h
4249
$SCRIPTPATH/include/simdjson/stringparsing.h
50+
$SCRIPTPATH/include/simdjson/stringparsing_macros.h
51+
$SCRIPTPATH/include/simdjson/stringparsing_westmere.h
52+
$SCRIPTPATH/include/simdjson/stringparsing_haswell.h
53+
$SCRIPTPATH/include/simdjson/stringparsing_arm64.h
4354
$SCRIPTPATH/include/simdjson/numberparsing.h
4455
$SCRIPTPATH/include/simdjson/stage2_build_tape.h
4556
$SCRIPTPATH/include/simdjson/jsonparser.h
@@ -125,16 +136,16 @@ echo "Giving final instructions:"
125136
CPPBIN=${DEMOCPP%%.*}
126137

127138
echo "Try :"
128-
echo "c++ -march=native -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json "
139+
echo "c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json "
129140

130141
SINGLEHDR=$SCRIPTPATH/singleheader
131142
echo "Copying files to $SCRIPTPATH/singleheader "
132143
mkdir -p $SINGLEHDR
133-
echo "c++ -march=native -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json " > $SINGLEHDR/README.md
144+
echo "c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json " > $SINGLEHDR/README.md
134145
cp ${AMAL_C} ${AMAL_H} ${DEMOCPP} $SINGLEHDR
135146
ls $SINGLEHDR
136147

137-
cd $SINGLEHDR && c++ -march=native -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json
148+
cd $SINGLEHDR && c++ -O3 -std=c++17 -o ${CPPBIN} ${DEMOCPP} && ./${CPPBIN} ../jsonexamples/twitter.json
138149

139150
lowercase(){
140151
echo "$1" | tr 'A-Z' 'a-z'

benchmark/minifiercompetition.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include "rapidjson/writer.h"
1515
#include "sajson.h"
1616

17-
17+
using namespace simdjson;
1818
using namespace rapidjson;
1919

2020
std::string rapidstringmeInsitu(char *json) {

benchmark/parse.cpp

Lines changed: 96 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,90 @@
3333
#include "simdjson/parsedjson.h"
3434
#include "simdjson/stage1_find_marks.h"
3535
#include "simdjson/stage2_build_tape.h"
36+
#include "simdjson/isadetection.h"
37+
namespace simdjson {
38+
architecture _find_best_supported_implementation() {
39+
constexpr uint32_t haswell_flags = SIMDExtensions::AVX2 | SIMDExtensions::PCLMULQDQ
40+
| SIMDExtensions::BMI1 | SIMDExtensions::BMI2;
41+
constexpr uint32_t westmere_flags = SIMDExtensions::SSE42 | SIMDExtensions::PCLMULQDQ;
42+
uint32_t supports = detect_supported_architectures();
43+
// Order from best to worst (within architecture)
44+
if ((haswell_flags & supports) == haswell_flags) {
45+
return architecture::haswell;
46+
}
47+
if ((westmere_flags & supports) == westmere_flags) {
48+
return architecture::westmere;
49+
}
50+
if (SIMDExtensions::NEON) return architecture::arm64;
51+
52+
return architecture::none;
53+
}
54+
55+
56+
using unified_functype = int (const uint8_t *buf, size_t len, ParsedJson &pj);
57+
using stage1_functype = int (const uint8_t *buf, size_t len, ParsedJson &pj);
58+
59+
60+
extern unified_functype *unified_ptr;
61+
62+
extern stage1_functype *stage1_ptr;
63+
64+
int unified_machine_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
65+
architecture best_implementation = _find_best_supported_implementation();
66+
// Selecting the best implementation
67+
switch (best_implementation) {
68+
#ifdef IS_X86_64
69+
case architecture::haswell:
70+
unified_ptr = &unified_machine<architecture::haswell>;
71+
break;
72+
case architecture::westmere:
73+
unified_ptr = &unified_machine<architecture::westmere>;
74+
break;
75+
#endif
76+
#ifdef IS_ARM64
77+
case architecture::arm64:
78+
unified_ptr = &unified_machine<architecture::arm64>;
79+
break;
80+
#endif
81+
default :
82+
std::cerr << "The processor is not supported by simdjson." << std::endl;
83+
return simdjson::UNEXPECTED_ERROR;
84+
}
85+
86+
return unified_ptr(buf, len, pj);
87+
}
88+
89+
// Responsible to select the best json_parse implementation
90+
int find_structural_bits_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj) {
91+
architecture best_implementation = _find_best_supported_implementation();
92+
// Selecting the best implementation
93+
switch (best_implementation) {
94+
#ifdef IS_X86_64
95+
case architecture::haswell:
96+
stage1_ptr = &find_structural_bits<architecture::haswell>;
97+
break;
98+
case architecture::westmere:
99+
stage1_ptr = &find_structural_bits<architecture::westmere>;
100+
break;
101+
#endif
102+
#ifdef IS_ARM64
103+
case architecture::arm64:
104+
stage1_ptr = &find_structural_bits<architecture::arm64>;
105+
break;
106+
#endif
107+
default :
108+
std::cerr << "The processor is not supported by simdjson." << std::endl;
109+
return simdjson::UNEXPECTED_ERROR;
110+
}
111+
112+
return stage1_ptr(buf, len, pj);
113+
}
114+
115+
stage1_functype *stage1_ptr = &find_structural_bits_dispatch;
116+
unified_functype *unified_ptr = &unified_machine_dispatch;
117+
}
118+
119+
36120

37121
int main(int argc, char *argv[]) {
38122
bool verbose = false;
@@ -104,7 +188,14 @@ int main(int argc, char *argv[]) {
104188
printf("justdata (-t) flag only works under linux.\n");
105189
}
106190
#endif
107-
191+
{// practice run
192+
simdjson::ParsedJson pj;
193+
bool allocok = pj.allocateCapacity(p.size());
194+
if(allocok) {
195+
simdjson::stage1_ptr((const uint8_t*)p.data(), p.size(), pj);
196+
simdjson::unified_ptr((const uint8_t*)(const uint8_t*)(const uint8_t*)(const uint8_t*)(const uint8_t*)(const uint8_t*)(const uint8_t*)(const uint8_t*)p.data(), p.size(), pj);
197+
}
198+
}
108199
#ifndef SQUASH_COUNTERS
109200
std::vector<int> evts;
110201
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
@@ -144,8 +235,7 @@ int main(int argc, char *argv[]) {
144235
std::cout << "[verbose] allocated memory for parsed JSON " << std::endl;
145236
}
146237
unified.start();
147-
// The default template is simdjson::instruction_set::native.
148-
isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
238+
isok = (simdjson::stage1_ptr((const uint8_t*)p.data(), p.size(), pj) == simdjson::SUCCESS);
149239
unified.end(results);
150240
cy1 += results[0];
151241
cl1 += results[1];
@@ -157,8 +247,7 @@ int main(int argc, char *argv[]) {
157247
break;
158248
}
159249
unified.start();
160-
// The default template is simdjson::instruction_set::native.
161-
isok = isok && (simdjson::SUCCESS == simdjson::unified_machine<>(p.data(), p.size(), pj));
250+
isok = isok && (simdjson::SUCCESS == simdjson::unified_ptr((const uint8_t*)p.data(), p.size(), pj));
162251
unified.end(results);
163252
cy2 += results[0];
164253
cl2 += results[1];
@@ -187,9 +276,8 @@ int main(int argc, char *argv[]) {
187276
}
188277

189278
auto start = std::chrono::steady_clock::now();
190-
// The default template is simdjson::instruction_set::native.
191-
isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
192-
isok = isok && (simdjson::SUCCESS == simdjson::unified_machine<>(p.data(), p.size(), pj));
279+
isok = (simdjson::stage1_ptr((const uint8_t*)p.data(), p.size(), pj) == simdjson::SUCCESS);
280+
isok = isok && (simdjson::SUCCESS == simdjson::unified_ptr((const uint8_t*)p.data(), p.size(), pj));
193281
auto end = std::chrono::steady_clock::now();
194282
std::chrono::duration<double> secs = end - start;
195283
res[i] = secs.count();

benchmark/parseandstatcompetition.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#include "sajson.h"
1313

1414
using namespace rapidjson;
15-
15+
using namespace simdjson;
1616
struct stat_s {
1717
size_t number_count;
1818
size_t object_count;

benchmark/statisticalmodel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ int main(int argc, char *argv[]) {
180180
results.resize(evts.size());
181181
for (uint32_t i = 0; i < iterations; i++) {
182182
unified.start();
183-
// The default template is simdjson::instruction_set::native.
183+
// The default template is simdjson::architecture::native.
184184
bool isok = (simdjson::find_structural_bits<>(p.data(), p.size(), pj) == simdjson::SUCCESS);
185185
unified.end(results);
186186

include/simdjson/common_defs.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#define ISALIGNED_N(ptr, n) (((uintptr_t)(ptr) & ((n)-1)) == 0)
3232

3333
#ifdef _MSC_VER
34-
#define really_inline inline
34+
#define really_inline __forceinline
3535
#define never_inline __declspec(noinline)
3636

3737
#define UNUSED
@@ -44,20 +44,26 @@
4444
#define unlikely(x) x
4545
#endif
4646

47+
// For Visual Studio compilers, same-page buffer overrun is not fine.
48+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false
49+
4750
#else
4851

4952
// For non-Visual Studio compilers, we may assume that same-page buffer overrun is fine.
5053
// However, it will make it difficult to be "valgrind clean".
5154
//#ifndef ALLOW_SAME_PAGE_BUFFER_OVERRUN
52-
//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN
55+
//#define ALLOW_SAME_PAGE_BUFFER_OVERRUN true
56+
//#else
57+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false
5358
//#endif
5459

5560
// The following is likely unnecessarily complex.
5661
#ifdef __SANITIZE_ADDRESS__
5762
// we have GCC, stuck with https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
58-
#undef ALLOW_SAME_PAGE_BUFFER_OVERRUN
63+
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN false
5964
#elif defined(__has_feature)
6065
// we have CLANG?
66+
// todo: if we're setting ALLOW_SAME_PAGE_BUFFER_OVERRUN to false, why do we have a non-empty qualifier?
6167
# if (__has_feature(address_sanitizer))
6268
#define ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER __attribute__((no_sanitize("address")))
6369
# endif

0 commit comments

Comments
 (0)