@@ -25,77 +25,60 @@ static really_inline void find_whitespace_and_structurals(simd_input<ARCHITECTUR
2525 uint64_t &whitespace, uint64_t &structurals) {
2626
2727 #ifdef SIMDJSON_NAIVE_STRUCTURAL
28- // You should never need this naive approach, but it can be useful
29- // for research purposes
30- const __m256i mask_open_brace = _mm256_set1_epi8 (0x7b );
31- __m256i struct_lo = _mm256_cmpeq_epi8 (in.lo , mask_open_brace);
32- __m256i struct_hi = _mm256_cmpeq_epi8 (in.hi , mask_open_brace);
33- const __m256i mask_close_brace = _mm256_set1_epi8 (0x7d );
34- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_close_brace));
35- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_close_brace));
36- const __m256i mask_open_bracket = _mm256_set1_epi8 (0x5b );
37- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_open_bracket));
38- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_open_bracket));
39- const __m256i mask_close_bracket = _mm256_set1_epi8 (0x5d );
40- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_close_bracket));
41- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_close_bracket));
42- const __m256i mask_column = _mm256_set1_epi8 (0x3a );
43- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_column));
44- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_column));
45- const __m256i mask_comma = _mm256_set1_epi8 (0x2c );
46- struct_lo = _mm256_or_si256 (struct_lo, _mm256_cmpeq_epi8 (in.lo , mask_comma));
47- struct_hi = _mm256_or_si256 (struct_hi, _mm256_cmpeq_epi8 (in.hi , mask_comma));
48- uint64_t structural_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (struct_lo));
49- uint64_t structural_res_1 = _mm256_movemask_epi8 (struct_hi);
50- structurals = (structural_res_0 | (structural_res_1 << 32 ));
51-
52- const __m256i mask_space = _mm256_set1_epi8 (0x20 );
53- __m256i space_lo = _mm256_cmpeq_epi8 (in.lo , mask_space);
54- __m256i space_hi = _mm256_cmpeq_epi8 (in.hi , mask_space);
55- const __m256i mask_linefeed = _mm256_set1_epi8 (0x0a );
56- space_lo = _mm256_or_si256 (space_lo, _mm256_cmpeq_epi8 (in.lo , mask_linefeed));
57- space_hi = _mm256_or_si256 (space_hi, _mm256_cmpeq_epi8 (in.hi , mask_linefeed));
58- const __m256i mask_tab = _mm256_set1_epi8 (0x09 );
59- space_lo = _mm256_or_si256 (space_lo, _mm256_cmpeq_epi8 (in.lo , mask_tab));
60- space_hi = _mm256_or_si256 (space_hi, _mm256_cmpeq_epi8 (in.hi , mask_tab));
61- const __m256i mask_carriage = _mm256_set1_epi8 (0x0d );
62- space_lo = _mm256_or_si256 (space_lo, _mm256_cmpeq_epi8 (in.lo , mask_carriage));
63- space_hi = _mm256_or_si256 (space_hi, _mm256_cmpeq_epi8 (in.hi , mask_carriage));
64-
65- uint64_t ws_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (space_lo));
66- uint64_t ws_res_1 = _mm256_movemask_epi8 (space_hi);
67- whitespace = (ws_res_0 | (ws_res_1 << 32 ));
68- // end of naive approach
28+
29+ // You should never need this naive approach, but it can be useful
30+ // for research purposes
31+ const __m256i mask_open_brace = _mm256_set1_epi8 (0x7b );
32+ const __m256i mask_close_brace = _mm256_set1_epi8 (0x7d );
33+ const __m256i mask_open_bracket = _mm256_set1_epi8 (0x5b );
34+ const __m256i mask_close_bracket = _mm256_set1_epi8 (0x5d );
35+ const __m256i mask_column = _mm256_set1_epi8 (0x3a );
36+ const __m256i mask_comma = _mm256_set1_epi8 (0x2c );
37+ structurals = in->build_bitmask ([&](auto in) {
38+ __m256i structurals = _mm256_cmpeq_epi8 (in, mask_open_brace);
39+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_close_brace));
40+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_open_bracket));
41+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_close_bracket));
42+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_column));
43+ structurals = _mm256_or_si256 (structurals, _mm256_cmpeq_epi8 (in, mask_comma));
44+ return structurals;
45+ });
46+
47+ const __m256i mask_space = _mm256_set1_epi8 (0x20 );
48+ const __m256i mask_linefeed = _mm256_set1_epi8 (0x0a );
49+ const __m256i mask_tab = _mm256_set1_epi8 (0x09 );
50+ const __m256i mask_carriage = _mm256_set1_epi8 (0x0d );
51+ whitespace = in->build_bitmask ([&](auto in) {
52+ __m256i space = _mm256_cmpeq_epi8 (in, mask_space);
53+ space = _mm256_or_si256 (space, _mm256_cmpeq_epi8 (in, mask_linefeed));
54+ space = _mm256_or_si256 (space, _mm256_cmpeq_epi8 (in, mask_tab));
55+ space = _mm256_or_si256 (space, _mm256_cmpeq_epi8 (in, mask_carriage));
56+ });
57+ // end of naive approach
6958
7059 #else // SIMDJSON_NAIVE_STRUCTURAL
71- // clang-format off
72- const __m256i structural_table =
73- _mm256_setr_epi8 (44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 ,
74- 44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 );
75- const __m256i white_table = _mm256_setr_epi8 (
76- 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 ,
77- 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 );
78- // clang-format on
79- const __m256i struct_offset = _mm256_set1_epi8 (0xd4u );
80- const __m256i struct_mask = _mm256_set1_epi8 (32 );
81-
82- __m256i lo_white = _mm256_cmpeq_epi8 (in.lo , _mm256_shuffle_epi8 (white_table, in.lo ));
83- __m256i hi_white = _mm256_cmpeq_epi8 (in.hi , _mm256_shuffle_epi8 (white_table, in.hi ));
84- uint64_t ws_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (lo_white));
85- uint64_t ws_res_1 = _mm256_movemask_epi8 (hi_white);
86- whitespace = (ws_res_0 | (ws_res_1 << 32 ));
87- __m256i lo_struct_r1 = _mm256_add_epi8 (struct_offset, in.lo );
88- __m256i hi_struct_r1 = _mm256_add_epi8 (struct_offset, in.hi );
89- __m256i lo_struct_r2 = _mm256_or_si256 (in.lo , struct_mask);
90- __m256i hi_struct_r2 = _mm256_or_si256 (in.hi , struct_mask);
91- __m256i lo_struct_r3 = _mm256_shuffle_epi8 (structural_table, lo_struct_r1);
92- __m256i hi_struct_r3 = _mm256_shuffle_epi8 (structural_table, hi_struct_r1);
93- __m256i lo_struct = _mm256_cmpeq_epi8 (lo_struct_r2, lo_struct_r3);
94- __m256i hi_struct = _mm256_cmpeq_epi8 (hi_struct_r2, hi_struct_r3);
95-
96- uint64_t structural_res_0 = static_cast <uint32_t >(_mm256_movemask_epi8 (lo_struct));
97- uint64_t structural_res_1 = _mm256_movemask_epi8 (hi_struct);
98- structurals = (structural_res_0 | (structural_res_1 << 32 ));
60+
61+ // clang-format off
62+ const __m256i structural_table =
63+ _mm256_setr_epi8 (44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 ,
64+ 44 , 125 , 0 , 0 , 0xc0u , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 58 , 123 );
65+ const __m256i white_table = _mm256_setr_epi8 (
66+ 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 ,
67+ 32 , 100 , 100 , 100 , 17 , 100 , 113 , 2 , 100 , 9 , 10 , 112 , 100 , 13 , 100 , 100 );
68+ // clang-format on
69+ const __m256i struct_offset = _mm256_set1_epi8 (0xd4u );
70+ const __m256i struct_mask = _mm256_set1_epi8 (32 );
71+
72+ whitespace = in.build_bitmask ([&](auto chunk) {
73+ return _mm256_cmpeq_epi8 (chunk, _mm256_shuffle_epi8 (white_table, chunk));
74+ });
75+ structurals = in.build_bitmask ([&](auto chunk) {
76+ __m256i struct_r1 = _mm256_add_epi8 (struct_offset, chunk);
77+ __m256i struct_r2 = _mm256_or_si256 (chunk, struct_mask);
78+ __m256i struct_r3 = _mm256_shuffle_epi8 (structural_table, struct_r1);
79+ return _mm256_cmpeq_epi8 (struct_r2, struct_r3);
80+ });
81+
9982 #endif // else SIMDJSON_NAIVE_STRUCTURAL
10083}
10184
0 commit comments