Skip to content

Commit ffaa292

Browse files
authored
Master vs2019 x86 compile fixes (simdjson#743)
* Added bitexact implementations of _BitScanForward64 and _BitScanReverse64 for VS2019 32-bit builds * Added bitexact implementations of _umul128 for VS2019 x86, arm, arm64 builds * Implement mul_overflow for VS2019 arm64 builds + implement mul_overflow using __umulh (msvc/clang results: https://godbolt.org/z/smRwA7) * Added Win32 for VS2019 to .appveyor.yml * Update amalgamated headers (fix x86 builds with VS2019)
1 parent fbf274a commit ffaa292

File tree

5 files changed

+79
-20
lines changed

5 files changed

+79
-20
lines changed

.appveyor.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,19 @@ environment:
1212
matrix:
1313
- SIMDJSON_BUILD_STATIC: ON
1414
SIMDJSON_ENABLE_THREADS: OFF
15+
SIMDJSON_PLATFORM: x64
16+
- SIMDJSON_BUILD_STATIC: ON
17+
SIMDJSON_ENABLE_THREADS: OFF
18+
SIMDJSON_PLATFORM: Win32
1519
- SIMDJSON_BUILD_STATIC: OFF
1620
SIMDJSON_ENABLE_THREADS: ON
21+
SIMDJSON_PLATFORM: x64
1722

1823
build_script:
1924
- set
2025
- mkdir build
2126
- cd build
22-
- cmake -DSIMDJSON_BUILD_STATIC=%SIMDJSON_BUILD_STATIC% -DSIMDJSON_ENABLE_THREADS=%SIMDJSON_ENABLE_THREADS% -DCMAKE_BUILD_TYPE=%Configuration% -DCMAKE_GENERATOR_PLATFORM=x64 ..
27+
- cmake -DSIMDJSON_BUILD_STATIC=%SIMDJSON_BUILD_STATIC% -DSIMDJSON_ENABLE_THREADS=%SIMDJSON_ENABLE_THREADS% -DCMAKE_BUILD_TYPE=%Configuration% -DCMAKE_GENERATOR_PLATFORM=%SIMDJSON_PLATFORM% ..
2328
- cmake -LH ..
2429
- cmake --build . --config %Configuration% --verbose
2530

singleheader/simdjson.cpp

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -697,16 +697,10 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *resu
697697
#endif
698698
}
699699

700-
#ifdef _MSC_VER
701-
#pragma intrinsic(_umul128) // todo: this might fail under visual studio for ARM
702-
#endif
703-
704700
really_inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
705701
#ifdef _MSC_VER
706-
// todo: this might fail under visual studio for ARM
707-
uint64_t high;
708-
*result = _umul128(value1, value2, &high);
709-
return high;
702+
*result = value1 * value2;
703+
return !!__umulh(value1, value2);
710704
#else
711705
return __builtin_umulll_overflow(value1, value2, (unsigned long long *)result);
712706
#endif
@@ -5759,7 +5753,7 @@ inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
57595753
// The following code is used in number parsing. It is not
57605754
// properly "char utils" stuff, but we move it here so that
57615755
// it does not get copied multiple times in the binaries (once
5762-
// per instructin set).
5756+
// per instruction set).
57635757
///
57645758

57655759

@@ -5771,10 +5765,26 @@ struct value128 {
57715765
uint64_t high;
57725766
};
57735767

5768+
#if defined(_MSC_VER) && !defined(_M_X64) // _umul128 for x86, arm, arm64
5769+
#if defined(_M_ARM)
5770+
static inline uint64_t __emulu(uint32_t x, uint32_t y) {
5771+
return x * (uint64_t)y;
5772+
}
5773+
#endif
5774+
static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
5775+
uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
5776+
uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
5777+
uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
5778+
uint64_t adbc_carry = !!(adbc < ad);
5779+
uint64_t lo = bd + (adbc << 32);
5780+
*hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd);
5781+
return lo;
5782+
}
5783+
#endif
5784+
57745785
really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
57755786
value128 answer;
57765787
#ifdef _MSC_VER
5777-
// todo: this might fail under visual studio for ARM
57785788
answer.low = _umul128(value1, value2, &answer.high);
57795789
#else
57805790
__uint128_t r = ((__uint128_t)value1) * value2;
@@ -8559,6 +8569,23 @@ WARN_UNUSED really_inline uint8_t *parse_string(const uint8_t *src, uint8_t *dst
85598569
namespace simdjson {
85608570
namespace fallback {
85618571

8572+
#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
8573+
static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
8574+
unsigned long x0 = (unsigned long)x, top, bottom;
8575+
_BitScanForward(&top, (unsigned long)(x >> 32));
8576+
_BitScanForward(&bottom, x0);
8577+
*ret = x0 ? bottom : 32 + top;
8578+
return x != 0;
8579+
}
8580+
static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
8581+
unsigned long x1 = (unsigned long)(x >> 32), top, bottom;
8582+
_BitScanReverse(&top, x1);
8583+
_BitScanReverse(&bottom, (unsigned long)x);
8584+
*ret = x1 ? top + 32 : bottom;
8585+
return x != 0;
8586+
}
8587+
#endif
8588+
85628589
// We sometimes call trailing_zero on inputs that are zero,
85638590
// but the algorithms do not end up using the returned value.
85648591
// Sadly, sanitizers are not smart enough to figure it out.

src/arm64/bitmanipulation.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,10 @@ really_inline bool add_overflow(uint64_t value1, uint64_t value2, uint64_t *resu
5959
#endif
6060
}
6161

62-
#ifdef _MSC_VER
63-
#pragma intrinsic(_umul128) // todo: this might fail under visual studio for ARM
64-
#endif
65-
6662
really_inline bool mul_overflow(uint64_t value1, uint64_t value2, uint64_t *result) {
6763
#ifdef _MSC_VER
68-
// todo: this might fail under visual studio for ARM
69-
uint64_t high;
70-
*result = _umul128(value1, value2, &high);
71-
return high;
64+
*result = value1 * value2;
65+
return !!__umulh(value1, value2);
7266
#else
7367
return __builtin_umulll_overflow(value1, value2, (unsigned long long *)result);
7468
#endif

src/fallback/bitmanipulation.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,23 @@
77
namespace simdjson {
88
namespace fallback {
99

10+
#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
11+
static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
12+
unsigned long x0 = (unsigned long)x, top, bottom;
13+
_BitScanForward(&top, (unsigned long)(x >> 32));
14+
_BitScanForward(&bottom, x0);
15+
*ret = x0 ? bottom : 32 + top;
16+
return x != 0;
17+
}
18+
static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
19+
unsigned long x1 = (unsigned long)(x >> 32), top, bottom;
20+
_BitScanReverse(&top, x1);
21+
_BitScanReverse(&bottom, (unsigned long)x);
22+
*ret = x1 ? top + 32 : bottom;
23+
return x != 0;
24+
}
25+
#endif
26+
1027
// We sometimes call trailing_zero on inputs that are zero,
1128
// but the algorithms do not end up using the returned value.
1229
// Sadly, sanitizers are not smart enough to figure it out.

src/jsoncharutils.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,26 @@ struct value128 {
324324
uint64_t high;
325325
};
326326

327+
#if defined(_MSC_VER) && !defined(_M_X64) // _umul128 for x86, arm, arm64
328+
#if defined(_M_ARM)
329+
static inline uint64_t __emulu(uint32_t x, uint32_t y) {
330+
return x * (uint64_t)y;
331+
}
332+
#endif
333+
static inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
334+
uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
335+
uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
336+
uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
337+
uint64_t adbc_carry = !!(adbc < ad);
338+
uint64_t lo = bd + (adbc << 32);
339+
*hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + (adbc_carry << 32) + !!(lo < bd);
340+
return lo;
341+
}
342+
#endif
343+
327344
really_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
328345
value128 answer;
329346
#ifdef _MSC_VER
330-
// todo: this might fail under visual studio for ARM
331347
answer.low = _umul128(value1, value2, &answer.high);
332348
#else
333349
__uint128_t r = ((__uint128_t)value1) * value2;

0 commit comments

Comments
 (0)