Skip to content

Commit fe69928

Browse files
authored
Merge pull request simdjson#918 from simdjson/jkeiser/remove-iterator-variables
[3/4] Remove unneeded structural_iterator variables
2 parents 98599e0 + e15e1e2 commit fe69928

File tree

4 files changed

+252
-120
lines changed

4 files changed

+252
-120
lines changed

src/generic/stage2/logger.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,10 @@ namespace logger {
6161
printf(" ");
6262
}
6363
printf("| %c ", printable_char(structurals.at_beginning() ? ' ' : structurals.current_char()));
64-
printf("| %c ", printable_char(structurals.peek_char()));
65-
printf("| %5u ", structurals.structural_indexes[structurals.next_structural]);
64+
printf("| %c ", printable_char(structurals.peek_next_char()));
65+
printf("| %5u ", structurals.parser.structural_indexes[*(structurals.current_structural+1)]);
6666
printf("| %-*s ", LOG_DETAIL_LEN, detail);
67-
printf("| %*zu ", LOG_INDEX_LEN, structurals.idx);
67+
printf("| %*u ", LOG_INDEX_LEN, *structurals.current_structural);
6868
printf("|\n");
6969
}
7070
}
Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
namespace stage2 {
2+
3+
struct streaming_structural_parser: structural_parser {
4+
really_inline streaming_structural_parser(dom_parser_implementation &_parser) : structural_parser(_parser, _parser.next_structural_index) {}
5+
6+
// override to add streaming
7+
WARN_UNUSED really_inline error_code start(ret_address_t finish_parser) {
8+
// If there are no structurals left, return EMPTY
9+
if (structurals.at_end(parser.n_structural_indexes)) {
10+
return parser.error = EMPTY;
11+
}
12+
13+
log_start();
14+
init();
15+
16+
// Capacity ain't no thang for streaming, so we don't check it.
17+
// Advance to the first character as soon as possible
18+
advance_char();
19+
// Push the root scope (there is always at least one scope)
20+
if (start_document(finish_parser)) {
21+
return parser.error = DEPTH_ERROR;
22+
}
23+
return SUCCESS;
24+
}
25+
26+
// override to add streaming
27+
WARN_UNUSED really_inline error_code finish() {
28+
if ( structurals.past_end(parser.n_structural_indexes) ) {
29+
log_error("IMPOSSIBLE: past the end of the JSON!");
30+
return parser.error = TAPE_ERROR;
31+
}
32+
end_document();
33+
parser.next_structural_index = uint32_t(structurals.next_structural_index());
34+
if (depth != 0) {
35+
log_error("Unclosed objects or arrays!");
36+
return parser.error = TAPE_ERROR;
37+
}
38+
if (parser.containing_scope[depth].tape_index != 0) {
39+
log_error("IMPOSSIBLE: root scope tape index did not start at 0!");
40+
return parser.error = TAPE_ERROR;
41+
}
42+
return SUCCESS;
43+
}
44+
};
45+
46+
} // namespace stage2
47+
48+
/************
49+
* The JSON is parsed to a tape, see the accompanying tape.md file
50+
* for documentation.
51+
***********/
52+
WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept {
53+
this->doc = &_doc;
54+
static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES();
55+
stage2::streaming_structural_parser parser(*this);
56+
error_code result = parser.start(addresses.finish);
57+
if (result) { return result; }
58+
//
59+
// Read first value
60+
//
61+
switch (parser.structurals.current_char()) {
62+
case '{':
63+
FAIL_IF( parser.start_object(addresses.finish) );
64+
goto object_begin;
65+
case '[':
66+
FAIL_IF( parser.start_array(addresses.finish) );
67+
goto array_begin;
68+
case '"':
69+
FAIL_IF( parser.parse_string() );
70+
goto finish;
71+
case 't': case 'f': case 'n':
72+
FAIL_IF( parser.parse_single_atom() );
73+
goto finish;
74+
case '0': case '1': case '2': case '3': case '4':
75+
case '5': case '6': case '7': case '8': case '9':
76+
FAIL_IF(
77+
parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
78+
return parser.parse_number(&copy[idx], false);
79+
})
80+
);
81+
goto finish;
82+
case '-':
83+
FAIL_IF(
84+
parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) {
85+
return parser.parse_number(&copy[idx], true);
86+
})
87+
);
88+
goto finish;
89+
default:
90+
parser.log_error("Document starts with a non-value character");
91+
goto error;
92+
}
93+
94+
//
95+
// Object parser parsers
96+
//
97+
object_begin:
98+
switch (parser.advance_char()) {
99+
case '"': {
100+
FAIL_IF( parser.parse_string(true) );
101+
goto object_key_parser;
102+
}
103+
case '}':
104+
parser.end_object();
105+
goto scope_end;
106+
default:
107+
parser.log_error("Object does not start with a key");
108+
goto error;
109+
}
110+
111+
object_key_parser:
112+
if (parser.advance_char() != ':' ) { parser.log_error("Missing colon after key in object"); goto error; }
113+
parser.increment_count();
114+
parser.advance_char();
115+
GOTO( parser.parse_value(addresses, addresses.object_continue) );
116+
117+
object_continue:
118+
switch (parser.advance_char()) {
119+
case ',':
120+
if (parser.advance_char() != '"' ) { parser.log_error("Key string missing at beginning of field in object"); goto error; }
121+
FAIL_IF( parser.parse_string(true) );
122+
goto object_key_parser;
123+
case '}':
124+
parser.end_object();
125+
goto scope_end;
126+
default:
127+
parser.log_error("No comma between object fields");
128+
goto error;
129+
}
130+
131+
scope_end:
132+
CONTINUE( parser.parser.ret_address[parser.depth] );
133+
134+
//
135+
// Array parser parsers
136+
//
137+
array_begin:
138+
if (parser.advance_char() == ']') {
139+
parser.end_array();
140+
goto scope_end;
141+
}
142+
parser.increment_count();
143+
144+
main_array_switch:
145+
/* we call update char on all paths in, so we can peek at parser.c on the
146+
* on paths that can accept a close square brace (post-, and at start) */
147+
GOTO( parser.parse_value(addresses, addresses.array_continue) );
148+
149+
array_continue:
150+
switch (parser.advance_char()) {
151+
case ',':
152+
parser.increment_count();
153+
parser.advance_char();
154+
goto main_array_switch;
155+
case ']':
156+
parser.end_array();
157+
goto scope_end;
158+
default:
159+
parser.log_error("Missing comma between array values");
160+
goto error;
161+
}
162+
163+
finish:
164+
return parser.finish();
165+
166+
error:
167+
return parser.error();
168+
}

src/generic/stage2/structural_iterator.h

Lines changed: 29 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,29 +2,34 @@ namespace stage2 {
22

33
class structural_iterator {
44
public:
5-
really_inline structural_iterator(const uint8_t* _buf, size_t _len, const uint32_t *_structural_indexes, size_t next_structural_index)
6-
: buf{_buf},
7-
len{_len},
8-
structural_indexes{_structural_indexes},
9-
next_structural{next_structural_index}
10-
{}
11-
really_inline char advance_char() {
12-
idx = structural_indexes[next_structural];
13-
next_structural++;
14-
c = *current();
15-
return c;
5+
const uint8_t* const buf;
6+
uint32_t *current_structural;
7+
dom_parser_implementation &parser;
8+
9+
// Start a structural
10+
really_inline structural_iterator(dom_parser_implementation &_parser, size_t start_structural_index)
11+
: buf{_parser.buf},
12+
current_structural{&_parser.structural_indexes[start_structural_index]},
13+
parser{_parser} {
14+
}
15+
// Get the buffer position of the current structural character
16+
really_inline const uint8_t* current() {
17+
return &buf[*current_structural];
1618
}
19+
// Get the current structural character
1720
really_inline char current_char() {
18-
return c;
21+
return buf[*current_structural];
1922
}
20-
really_inline char peek_char() {
21-
return buf[structural_indexes[next_structural]];
23+
// Get the next structural character without advancing
24+
really_inline char peek_next_char() {
25+
return buf[*(current_structural+1)];
2226
}
23-
really_inline const uint8_t* current() {
24-
return &buf[idx];
27+
really_inline char advance_char() {
28+
current_structural++;
29+
return buf[*current_structural];
2530
}
2631
really_inline size_t remaining_len() {
27-
return len - idx;
32+
return parser.len - *current_structural;
2833
}
2934
template<typename F>
3035
really_inline bool with_space_terminated_copy(const F& f) {
@@ -41,35 +46,25 @@ class structural_iterator {
4146
* practice unless you are in the strange scenario where you have many JSON
4247
* documents made of single atoms.
4348
*/
44-
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING));
49+
char *copy = static_cast<char *>(malloc(parser.len + SIMDJSON_PADDING));
4550
if (copy == nullptr) {
4651
return true;
4752
}
48-
memcpy(copy, buf, len);
49-
memset(copy + len, ' ', SIMDJSON_PADDING);
50-
bool result = f(reinterpret_cast<const uint8_t*>(copy), idx);
53+
memcpy(copy, buf, parser.len);
54+
memset(copy + parser.len, ' ', SIMDJSON_PADDING);
55+
bool result = f(reinterpret_cast<const uint8_t*>(copy), *current_structural);
5156
free(copy);
5257
return result;
5358
}
5459
really_inline bool past_end(uint32_t n_structural_indexes) {
55-
return next_structural > n_structural_indexes;
60+
return current_structural >= &parser.structural_indexes[n_structural_indexes];
5661
}
5762
really_inline bool at_end(uint32_t n_structural_indexes) {
58-
return next_structural == n_structural_indexes;
63+
return current_structural == &parser.structural_indexes[n_structural_indexes];
5964
}
6065
really_inline bool at_beginning() {
61-
return next_structural == 0;
62-
}
63-
really_inline size_t next_structural_index() {
64-
return next_structural;
66+
return current_structural == parser.structural_indexes.get();
6567
}
66-
67-
const uint8_t* const buf;
68-
const size_t len;
69-
const uint32_t* const structural_indexes;
70-
size_t next_structural; // next structural index
71-
size_t idx{0}; // location of the structural character in the input (buf)
72-
uint8_t c{0}; // used to track the (structural) character we are looking at
7368
};
7469

7570
} // namespace stage2

0 commit comments

Comments
 (0)