Skip to content

Commit 1aab475

Browse files
committed
Store all parser state in the implementation
1 parent 86f8a4a commit 1aab475

25 files changed

+329
-20675
lines changed

benchmark/benchmarker.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ struct json_stats {
8484
bytes = json.size();
8585
blocks = bytes / BYTES_PER_BLOCK;
8686
if (bytes % BYTES_PER_BLOCK > 0) { blocks++; } // Account for remainder block
87-
structurals = parser.n_structural_indexes-1;
87+
structurals = parser.implementation->n_structural_indexes-1;
8888

8989
// Calculate stats on blocks that will trigger utf-8 if statements / mispredictions
9090
bool last_block_has_utf8 = false;
@@ -141,7 +141,7 @@ struct json_stats {
141141
for (size_t block=0; block<blocks; block++) {
142142
// Count structurals in the block
143143
int block_structurals=0;
144-
while (structural < parser.n_structural_indexes && parser.structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
144+
while (structural < parser.implementation->n_structural_indexes && parser.implementation->structural_indexes[structural] < (block+1)*BYTES_PER_BLOCK) {
145145
block_structurals++;
146146
structural++;
147147
}
@@ -320,7 +320,7 @@ struct benchmarker {
320320

321321
// Stage 1 (find structurals)
322322
collector.start();
323-
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), parser, false);
323+
error = parser.implementation->stage1((const uint8_t *)json.data(), json.size(), false);
324324
event_count stage1_count = collector.end();
325325
stage1 << stage1_count;
326326
if (error) {
@@ -334,7 +334,7 @@ struct benchmarker {
334334
} else {
335335
event_count stage2_count;
336336
collector.start();
337-
error = parser.implementation->stage2(parser);
337+
error = parser.implementation->stage2(parser.doc);
338338
if (error) {
339339
exit_error(string("Failed to parse ") + filename + " during stage 2 parsing " + error_message(error));
340340
}
@@ -345,7 +345,7 @@ struct benchmarker {
345345
// Calculate stats the first time we parse
346346
if (stats == NULL) {
347347
if (stage1_only) { // we need stage 2 once
348-
error = parser.implementation->stage2(parser);
348+
error = parser.implementation->stage2(parser.doc);
349349
if (error) {
350350
printf("Warning: failed to parse during stage 2. Unable to acquire statistics.\n");
351351
}

benchmark/statisticalmodel.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ stat_t simdjson_compute_stats(const simdjson::padded_string &p) {
106106
answer.non_ascii_byte_count = count_nonasciibytes(
107107
reinterpret_cast<const uint8_t *>(p.data()), p.size());
108108
answer.byte_count = p.size();
109-
answer.structural_indexes_count = parser.n_structural_indexes;
109+
answer.structural_indexes_count = parser.implementation->n_structural_indexes;
110110
simdjson_recurse(answer, doc);
111111
return answer;
112112
}
@@ -163,7 +163,6 @@ int main(int argc, char *argv[]) {
163163
s.true_count, s.false_count, s.byte_count, s.structural_indexes_count);
164164
#ifdef __linux__
165165
simdjson::dom::parser parser;
166-
const simdjson::implementation &stage_parser = *simdjson::active_implementation;
167166
simdjson::error_code alloc_error = parser.allocate(p.size());
168167
if (alloc_error) {
169168
std::cerr << alloc_error << std::endl;
@@ -181,14 +180,14 @@ int main(int argc, char *argv[]) {
181180
for (uint32_t i = 0; i < iterations; i++) {
182181
unified.start();
183182
// The default template is simdjson::architecture::NATIVE.
184-
bool isok = (stage_parser.stage1((const uint8_t *)p.data(), p.size(), parser, false) == simdjson::SUCCESS);
183+
bool isok = (parser.implementation->stage1((const uint8_t *)p.data(), p.size(), false) == simdjson::SUCCESS);
185184
unified.end(results);
186185

187186
cy1 += results[0];
188187
cl1 += results[1];
189188

190189
unified.start();
191-
isok = isok && (stage_parser.stage2((const uint8_t *)p.data(), p.size(), parser) == simdjson::SUCCESS);
190+
isok = isok && (parser.implementation->stage2(parser.doc) == simdjson::SUCCESS);
192191
unified.end(results);
193192

194193
cy2 += results[0];

include/simdjson/dom/parser.h

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,6 @@
1515

1616
namespace simdjson {
1717

18-
namespace internal {
19-
20-
// expectation: sizeof(scope_descriptor) = 64/8.
21-
struct scope_descriptor {
22-
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
23-
uint32_t count; // how many elements in the scope
24-
}; // struct scope_descriptor
25-
26-
#ifdef SIMDJSON_USE_COMPUTED_GOTO
27-
typedef void* ret_address;
28-
#else
29-
typedef char ret_address;
30-
#endif
31-
32-
} // namespace internal
33-
3418
namespace dom {
3519

3620
class document_stream;
@@ -68,14 +52,14 @@ class parser {
6852
*
6953
* @param other The parser to take. Its capacity is zeroed.
7054
*/
71-
parser(parser &&other) = default;
55+
really_inline parser(parser &&other) noexcept;
7256
parser(const parser &) = delete; ///< @private Disallow copying
7357
/**
7458
* Take another parser's buffers and state.
7559
*
7660
* @param other The parser to take. Its capacity is zeroed.
7761
*/
78-
parser &operator=(parser &&other) = default;
62+
really_inline parser &operator=(parser &&other) noexcept;
7963
parser &operator=(const parser &) = delete; ///< @private Disallow copying
8064

8165
/** Deallocate the JSON parser. */
@@ -352,21 +336,6 @@ class parser {
352336
/** @private [for benchmarking access] The implementation to use */
353337
std::unique_ptr<internal::dom_parser_implementation> implementation{};
354338

355-
public:
356-
/** @private Next location to write to in the tape */
357-
uint32_t current_loc{0};
358-
359-
/** @private Number of structural indices passed from stage 1 to stage 2 */
360-
uint32_t n_structural_indexes{0};
361-
/** @private Structural indices passed from stage 1 to stage 2 */
362-
std::unique_ptr<uint32_t[]> structural_indexes{};
363-
364-
/** @private Tape location of each open { or [ */
365-
std::unique_ptr<internal::scope_descriptor[]> containing_scope{};
366-
367-
/** @private Return address of each open { or [ */
368-
std::unique_ptr<internal::ret_address[]> ret_address{};
369-
370339
/** @private Use `if (parser.parse(...).error())` instead */
371340
bool valid{false};
372341
/** @private Use `parser.parse(...).error()` instead */
@@ -405,20 +374,6 @@ class parser {
405374
*/
406375
size_t _max_capacity;
407376

408-
/**
409-
* The maximum document length this parser supports.
410-
*
411-
* Buffers are large enough to handle any document up to this length.
412-
*/
413-
size_t _capacity{0};
414-
415-
/**
416-
* The maximum depth (number of nested objects and arrays) supported by this parser.
417-
*
418-
* Defaults to DEFAULT_MAX_DEPTH.
419-
*/
420-
size_t _max_depth{0};
421-
422377
/**
423378
* The loaded buffer (reused each time load() is called)
424379
*/

include/simdjson/implementation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
namespace simdjson {
1212

1313
namespace dom {
14-
class parser;
14+
class document;
1515
} // namespace dom
1616

1717
/**

include/simdjson/inline/document_stream.h

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -34,18 +34,18 @@ namespace internal {
3434
* */
3535
inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const dom::parser &parser) {
3636
// this function can be generally useful
37-
if (parser.n_structural_indexes == 0)
37+
if (parser.implementation->n_structural_indexes == 0)
3838
return 0;
39-
auto last_i = parser.n_structural_indexes - 1;
40-
if (parser.structural_indexes[last_i] == size) {
39+
auto last_i = parser.implementation->n_structural_indexes - 1;
40+
if (parser.implementation->structural_indexes[last_i] == size) {
4141
if (last_i == 0)
4242
return 0;
43-
last_i = parser.n_structural_indexes - 2;
43+
last_i = parser.implementation->n_structural_indexes - 2;
4444
}
4545
auto arr_cnt = 0;
4646
auto obj_cnt = 0;
4747
for (auto i = last_i; i > 0; i--) {
48-
auto idxb = parser.structural_indexes[i];
48+
auto idxb = parser.implementation->structural_indexes[i];
4949
switch (buf[idxb]) {
5050
case ':':
5151
case ',':
@@ -63,7 +63,7 @@ inline uint32_t find_last_json_buf_idx(const uint8_t *buf, size_t size, const do
6363
arr_cnt++;
6464
break;
6565
}
66-
auto idxa = parser.structural_indexes[i - 1];
66+
auto idxa = parser.implementation->structural_indexes[i - 1];
6767
switch (buf[idxa]) {
6868
case '{':
6969
case '[':
@@ -172,17 +172,17 @@ inline error_code document_stream::json_parse() noexcept {
172172
if (_batch_size == 0) {
173173
return simdjson::UTF8_ERROR;
174174
}
175-
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, parser, true));
175+
auto stage1_is_ok = error_code(parser.implementation->stage1(buf(), _batch_size, true));
176176
if (stage1_is_ok != simdjson::SUCCESS) {
177177
return stage1_is_ok;
178178
}
179179
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
180180
if (last_index == 0) {
181-
if (parser.n_structural_indexes == 0) {
181+
if (parser.implementation->n_structural_indexes == 0) {
182182
return simdjson::EMPTY;
183183
}
184184
} else {
185-
parser.n_structural_indexes = last_index + 1;
185+
parser.implementation->n_structural_indexes = last_index + 1;
186186
}
187187
}
188188
// the second thread is running or done.
@@ -191,15 +191,15 @@ inline error_code document_stream::json_parse() noexcept {
191191
if (stage1_is_ok_thread != simdjson::SUCCESS) {
192192
return stage1_is_ok_thread;
193193
}
194-
std::swap(parser.structural_indexes, parser_thread.structural_indexes);
195-
parser.n_structural_indexes = parser_thread.n_structural_indexes;
194+
std::swap(parser.implementation->structural_indexes, parser_thread.implementation->structural_indexes);
195+
parser.implementation->n_structural_indexes = parser_thread.implementation->n_structural_indexes;
196196
advance(last_json_buffer_loc);
197197
n_bytes_parsed += last_json_buffer_loc;
198198
}
199199
// let us decide whether we will start a new thread
200200
if (remaining() - _batch_size > 0) {
201201
last_json_buffer_loc =
202-
parser.structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
202+
parser.implementation->structural_indexes[internal::find_last_json_buf_idx(buf(), _batch_size, parser)];
203203
_batch_size = (std::min)(_batch_size, remaining() - last_json_buffer_loc);
204204
if (_batch_size > 0) {
205205
_batch_size = internal::trimmed_length_safe_utf8(
@@ -214,22 +214,22 @@ inline error_code document_stream::json_parse() noexcept {
214214
// this->stage1_is_ok_thread
215215
// there is only one thread that may write to this value
216216
stage_1_thread = std::thread([this, b, bs] {
217-
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, this->parser_thread, true));
217+
this->stage1_is_ok_thread = error_code(parser_thread.implementation->stage1(b, bs, true));
218218
});
219219
}
220220
}
221221
next_json = 0;
222222
load_next_batch = false;
223223
} // load_next_batch
224-
error_code res = parser.implementation->stage2(buf(), remaining(), parser, next_json);
224+
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
225225
if (res == simdjson::SUCCESS_AND_HAS_MORE) {
226226
n_parsed_docs++;
227-
current_buffer_loc = parser.structural_indexes[next_json];
227+
current_buffer_loc = parser.implementation->structural_indexes[next_json];
228228
load_next_batch = (current_buffer_loc == last_json_buffer_loc);
229229
} else if (res == simdjson::SUCCESS) {
230230
n_parsed_docs++;
231231
if (remaining() > _batch_size) {
232-
current_buffer_loc = parser.structural_indexes[next_json - 1];
232+
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
233233
load_next_batch = true;
234234
res = simdjson::SUCCESS_AND_HAS_MORE;
235235
}
@@ -249,28 +249,28 @@ inline error_code document_stream::json_parse() noexcept {
249249
n_bytes_parsed += current_buffer_loc;
250250
_batch_size = (std::min)(_batch_size, remaining());
251251
_batch_size = internal::trimmed_length_safe_utf8((const char *)buf(), _batch_size);
252-
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, parser, true);
252+
auto stage1_is_ok = (error_code)parser.implementation->stage1(buf(), _batch_size, true);
253253
if (stage1_is_ok != simdjson::SUCCESS) {
254254
return stage1_is_ok;
255255
}
256256
uint32_t last_index = internal::find_last_json_buf_idx(buf(), _batch_size, parser);
257257
if (last_index == 0) {
258-
if (parser.n_structural_indexes == 0) {
258+
if (parser.implementation->n_structural_indexes == 0) {
259259
return EMPTY;
260260
}
261261
} else {
262-
parser.n_structural_indexes = last_index + 1;
262+
parser.implementation->n_structural_indexes = last_index + 1;
263263
}
264264
load_next_batch = false;
265265
} // load_next_batch
266-
error_code res = parser.implementation->stage2(buf(), remaining(), parser, next_json);
266+
error_code res = parser.implementation->stage2(buf(), remaining(), parser.doc, next_json);
267267
if (likely(res == simdjson::SUCCESS_AND_HAS_MORE)) {
268268
n_parsed_docs++;
269-
current_buffer_loc = parser.structural_indexes[next_json];
269+
current_buffer_loc = parser.implementation->structural_indexes[next_json];
270270
} else if (res == simdjson::SUCCESS) {
271271
n_parsed_docs++;
272272
if (remaining() > _batch_size) {
273-
current_buffer_loc = parser.structural_indexes[next_json - 1];
273+
current_buffer_loc = parser.implementation->structural_indexes[next_json - 1];
274274
next_json = 1;
275275
load_next_batch = true;
276276
res = simdjson::SUCCESS_AND_HAS_MORE;

0 commit comments

Comments
 (0)