|
| 1 | +namespace stage2 { |
| 2 | + |
| 3 | +struct streaming_structural_parser: structural_parser { |
| 4 | + really_inline streaming_structural_parser(dom_parser_implementation &_parser) : structural_parser(_parser, _parser.next_structural_index) {} |
| 5 | + |
| 6 | + // override to add streaming |
| 7 | + WARN_UNUSED really_inline error_code start(ret_address_t finish_parser) { |
| 8 | + // If there are no structurals left, return EMPTY |
| 9 | + if (structurals.at_end(parser.n_structural_indexes)) { |
| 10 | + return parser.error = EMPTY; |
| 11 | + } |
| 12 | + |
| 13 | + log_start(); |
| 14 | + init(); |
| 15 | + |
| 16 | + // Capacity ain't no thang for streaming, so we don't check it. |
| 17 | + // Advance to the first character as soon as possible |
| 18 | + advance_char(); |
| 19 | + // Push the root scope (there is always at least one scope) |
| 20 | + if (start_document(finish_parser)) { |
| 21 | + return parser.error = DEPTH_ERROR; |
| 22 | + } |
| 23 | + return SUCCESS; |
| 24 | + } |
| 25 | + |
| 26 | + // override to add streaming |
| 27 | + WARN_UNUSED really_inline error_code finish() { |
| 28 | + if ( structurals.past_end(parser.n_structural_indexes) ) { |
| 29 | + log_error("IMPOSSIBLE: past the end of the JSON!"); |
| 30 | + return parser.error = TAPE_ERROR; |
| 31 | + } |
| 32 | + end_document(); |
| 33 | + parser.next_structural_index = uint32_t(structurals.next_structural_index()); |
| 34 | + if (depth != 0) { |
| 35 | + log_error("Unclosed objects or arrays!"); |
| 36 | + return parser.error = TAPE_ERROR; |
| 37 | + } |
| 38 | + if (parser.containing_scope[depth].tape_index != 0) { |
| 39 | + log_error("IMPOSSIBLE: root scope tape index did not start at 0!"); |
| 40 | + return parser.error = TAPE_ERROR; |
| 41 | + } |
| 42 | + return SUCCESS; |
| 43 | + } |
| 44 | +}; |
| 45 | + |
| 46 | +} // namespace stage2 |
| 47 | + |
| 48 | +/************ |
| 49 | + * The JSON is parsed to a tape, see the accompanying tape.md file |
| 50 | + * for documentation. |
| 51 | + ***********/ |
| 52 | +WARN_UNUSED error_code dom_parser_implementation::stage2_next(dom::document &_doc) noexcept { |
| 53 | + this->doc = &_doc; |
| 54 | + static constexpr stage2::unified_machine_addresses addresses = INIT_ADDRESSES(); |
| 55 | + stage2::streaming_structural_parser parser(*this); |
| 56 | + error_code result = parser.start(addresses.finish); |
| 57 | + if (result) { return result; } |
| 58 | + // |
| 59 | + // Read first value |
| 60 | + // |
| 61 | + switch (parser.structurals.current_char()) { |
| 62 | + case '{': |
| 63 | + FAIL_IF( parser.start_object(addresses.finish) ); |
| 64 | + goto object_begin; |
| 65 | + case '[': |
| 66 | + FAIL_IF( parser.start_array(addresses.finish) ); |
| 67 | + goto array_begin; |
| 68 | + case '"': |
| 69 | + FAIL_IF( parser.parse_string() ); |
| 70 | + goto finish; |
| 71 | + case 't': case 'f': case 'n': |
| 72 | + FAIL_IF( parser.parse_single_atom() ); |
| 73 | + goto finish; |
| 74 | + case '0': case '1': case '2': case '3': case '4': |
| 75 | + case '5': case '6': case '7': case '8': case '9': |
| 76 | + FAIL_IF( |
| 77 | + parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { |
| 78 | + return parser.parse_number(©[idx], false); |
| 79 | + }) |
| 80 | + ); |
| 81 | + goto finish; |
| 82 | + case '-': |
| 83 | + FAIL_IF( |
| 84 | + parser.structurals.with_space_terminated_copy([&](const uint8_t *copy, size_t idx) { |
| 85 | + return parser.parse_number(©[idx], true); |
| 86 | + }) |
| 87 | + ); |
| 88 | + goto finish; |
| 89 | + default: |
| 90 | + parser.log_error("Document starts with a non-value character"); |
| 91 | + goto error; |
| 92 | + } |
| 93 | + |
| 94 | +// |
| 95 | +// Object parser parsers |
| 96 | +// |
| 97 | +object_begin: |
| 98 | + switch (parser.advance_char()) { |
| 99 | + case '"': { |
| 100 | + FAIL_IF( parser.parse_string(true) ); |
| 101 | + goto object_key_parser; |
| 102 | + } |
| 103 | + case '}': |
| 104 | + parser.end_object(); |
| 105 | + goto scope_end; |
| 106 | + default: |
| 107 | + parser.log_error("Object does not start with a key"); |
| 108 | + goto error; |
| 109 | + } |
| 110 | + |
| 111 | +object_key_parser: |
| 112 | + if (parser.advance_char() != ':' ) { parser.log_error("Missing colon after key in object"); goto error; } |
| 113 | + parser.increment_count(); |
| 114 | + parser.advance_char(); |
| 115 | + GOTO( parser.parse_value(addresses, addresses.object_continue) ); |
| 116 | + |
| 117 | +object_continue: |
| 118 | + switch (parser.advance_char()) { |
| 119 | + case ',': |
| 120 | + if (parser.advance_char() != '"' ) { parser.log_error("Key string missing at beginning of field in object"); goto error; } |
| 121 | + FAIL_IF( parser.parse_string(true) ); |
| 122 | + goto object_key_parser; |
| 123 | + case '}': |
| 124 | + parser.end_object(); |
| 125 | + goto scope_end; |
| 126 | + default: |
| 127 | + parser.log_error("No comma between object fields"); |
| 128 | + goto error; |
| 129 | + } |
| 130 | + |
| 131 | +scope_end: |
| 132 | + CONTINUE( parser.parser.ret_address[parser.depth] ); |
| 133 | + |
| 134 | +// |
| 135 | +// Array parser parsers |
| 136 | +// |
| 137 | +array_begin: |
| 138 | + if (parser.advance_char() == ']') { |
| 139 | + parser.end_array(); |
| 140 | + goto scope_end; |
| 141 | + } |
| 142 | + parser.increment_count(); |
| 143 | + |
| 144 | +main_array_switch: |
| 145 | + /* we call update char on all paths in, so we can peek at parser.c on the |
| 146 | + * on paths that can accept a close square brace (post-, and at start) */ |
| 147 | + GOTO( parser.parse_value(addresses, addresses.array_continue) ); |
| 148 | + |
| 149 | +array_continue: |
| 150 | + switch (parser.advance_char()) { |
| 151 | + case ',': |
| 152 | + parser.increment_count(); |
| 153 | + parser.advance_char(); |
| 154 | + goto main_array_switch; |
| 155 | + case ']': |
| 156 | + parser.end_array(); |
| 157 | + goto scope_end; |
| 158 | + default: |
| 159 | + parser.log_error("Missing comma between array values"); |
| 160 | + goto error; |
| 161 | + } |
| 162 | + |
| 163 | +finish: |
| 164 | + return parser.finish(); |
| 165 | + |
| 166 | +error: |
| 167 | + return parser.error(); |
| 168 | +} |
0 commit comments