forked from simdjson/simdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocument.h
More file actions
1341 lines (1215 loc) · 48.2 KB
/
document.h
File metadata and controls
1341 lines (1215 loc) · 48.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#ifndef SIMDJSON_DOCUMENT_H
#define SIMDJSON_DOCUMENT_H
#include <cstring>
#include <memory>
#include <string>
#include <limits>
#include <sstream>
#include "simdjson/common_defs.h"
#include "simdjson/simdjson.h"
#include "simdjson/padded_string.h"
namespace simdjson {
namespace dom {
class parser;
class element;
class array;
class object;
class key_value_pair;
class document;
class document_stream;
/** The default batch size for parser.parse_many() and parser.load_many() */
static constexpr size_t DEFAULT_BATCH_SIZE = 1000000;
} // namespace dom
template<> struct simdjson_result<dom::element>;
template<> struct simdjson_result<dom::array>;
template<> struct simdjson_result<dom::object>;
template<typename T>
class minify;
namespace internal {
using namespace simdjson::dom;
constexpr const uint64_t JSON_VALUE_MASK = 0x00FFFFFFFFFFFFFF;
constexpr const uint32_t JSON_COUNT_MASK = 0xFFFFFF;
/**
* The possible types in the tape.
*/
enum class tape_type {
ROOT = 'r',
START_ARRAY = '[',
START_OBJECT = '{',
END_ARRAY = ']',
END_OBJECT = '}',
STRING = '"',
INT64 = 'l',
UINT64 = 'u',
DOUBLE = 'd',
TRUE_VALUE = 't',
FALSE_VALUE = 'f',
NULL_VALUE = 'n'
};
/**
* A reference to an element on the tape. Internal only.
*/
class tape_ref {
public:
really_inline tape_ref() noexcept;
really_inline tape_ref(const document *doc, size_t json_index) noexcept;
inline size_t after_element() const noexcept;
really_inline tape_type tape_ref_type() const noexcept;
really_inline uint64_t tape_value() const noexcept;
really_inline bool is_double() const noexcept;
really_inline bool is_int64() const noexcept;
really_inline bool is_uint64() const noexcept;
really_inline bool is_false() const noexcept;
really_inline bool is_true() const noexcept;
really_inline bool is_null_on_tape() const noexcept;// different name to avoid clash with is_null.
really_inline uint32_t matching_brace_index() const noexcept;
really_inline uint32_t scope_count() const noexcept;
template<typename T>
really_inline T next_tape_value() const noexcept;
inline std::string_view get_string_view() const noexcept;
/** The document this element references. */
const document *doc;
/** The index of this element on `doc.tape[]` */
size_t json_index;
};
} // namespace internal
namespace dom {
/**
* The actual concrete type of a JSON element
* This is the type it is most easily cast to with get<>.
*/
enum class element_type {
ARRAY = '[', ///< dom::array
OBJECT = '{', ///< dom::object
INT64 = 'l', ///< int64_t
UINT64 = 'u', ///< uint64_t: any integer that fits in uint64_t but *not* int64_t
DOUBLE = 'd', ///< double: Any number with a "." or "e" that fits in double.
STRING = '"', ///< std::string_view
BOOL = 't', ///< bool
NULL_VALUE = 'n' ///< null
};
/**
* JSON array.
*/
class array : protected internal::tape_ref {
public:
/** Create a new, invalid array */
really_inline array() noexcept;
class iterator : protected internal::tape_ref {
public:
/**
* Get the actual value
*/
inline element operator*() const noexcept;
/**
* Get the next value.
*
* Part of the std::iterator interface.
*
*/
inline iterator& operator++() noexcept;
/**
* Check if these values come from the same place in the JSON.
*
* Part of the std::iterator interface.
*/
inline bool operator!=(const iterator& other) const noexcept;
private:
really_inline iterator(const document *doc, size_t json_index) noexcept;
friend class array;
};
/**
* Return the first array element.
*
* Part of the std::iterable interface.
*/
inline iterator begin() const noexcept;
/**
* One past the last array element.
*
* Part of the std::iterable interface.
*/
inline iterator end() const noexcept;
/**
* Get the size of the array (number of immediate children).
* It is a saturated value with a maximum of 0xFFFFFF: if the value
* is 0xFFFFFF then the size is 0xFFFFFF or greater.
*/
inline size_t size() const noexcept;
/**
* Get the value associated with the given JSON pointer.
*
* dom::parser parser;
* array a = parser.parse(R"([ { "foo": { "a": [ 10, 20, 30 ] }} ])");
* a.at("0/foo/a/1") == 20
* a.at("0")["foo"]["a"].at(1) == 20
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept;
/**
* Get the value at the given index.
*
* @return The value at the given index, or:
* - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
*/
inline simdjson_result<element> at(size_t index) const noexcept;
private:
really_inline array(const document *doc, size_t json_index) noexcept;
friend class element;
friend struct simdjson_result<element>;
template<typename T>
friend class simdjson::minify;
};
/**
* JSON object.
*/
class object : protected internal::tape_ref {
public:
/** Create a new, invalid object */
really_inline object() noexcept;
class iterator : protected internal::tape_ref {
public:
/**
* Get the actual key/value pair
*/
inline const key_value_pair operator*() const noexcept;
/**
* Get the next key/value pair.
*
* Part of the std::iterator interface.
*
*/
inline iterator& operator++() noexcept;
/**
* Check if these key value pairs come from the same place in the JSON.
*
* Part of the std::iterator interface.
*/
inline bool operator!=(const iterator& other) const noexcept;
/**
* Get the key of this key/value pair.
*/
inline std::string_view key() const noexcept;
/**
* Get the key of this key/value pair.
*/
inline const char *key_c_str() const noexcept;
/**
* Get the value of this key/value pair.
*/
inline element value() const noexcept;
private:
really_inline iterator(const document *doc, size_t json_index) noexcept;
friend class object;
};
/**
* Return the first key/value pair.
*
* Part of the std::iterable interface.
*/
inline iterator begin() const noexcept;
/**
* One past the last key/value pair.
*
* Part of the std::iterable interface.
*/
inline iterator end() const noexcept;
/**
* Get the size of the object (number of keys).
* It is a saturated value with a maximum of 0xFFFFFF: if the value
* is 0xFFFFFF then the size is 0xFFFFFF or greater.
*/
inline size_t size() const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const char *key) const noexcept;
/**
* Get the value associated with the given JSON pointer.
*
* dom::parser parser;
* object obj = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})");
* obj.at("foo/a/1") == 20
* obj.at("foo")["a"].at(1) == 20
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key(const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key in a case-insensitive manner.
* It is only guaranteed to work over ASCII inputs.
*
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept;
private:
really_inline object(const document *doc, size_t json_index) noexcept;
friend class element;
friend struct simdjson_result<element>;
template<typename T>
friend class simdjson::minify;
};
/**
* A parsed JSON document.
*
* This class cannot be copied, only moved, to avoid unintended allocations.
*/
class document {
public:
/**
* Create a document container with zero capacity.
*
* The parser will allocate capacity as needed.
*/
document() noexcept = default;
~document() noexcept = default;
/**
* Take another document's buffers.
*
* @param other The document to take. Its capacity is zeroed and it is invalidated.
*/
document(document &&other) noexcept = default;
/** @private */
document(const document &) = delete; // Disallow copying
/**
* Take another document's buffers.
*
* @param other The document to take. Its capacity is zeroed.
*/
document &operator=(document &&other) noexcept = default;
/** @private */
document &operator=(const document &) = delete; // Disallow copying
/**
* Get the root element of this document as a JSON array.
*/
element root() const noexcept;
/**
* @private Dump the raw tape for debugging.
*
* @param os the stream to output to.
* @return false if the tape is likely wrong (e.g., you did not parse a valid JSON).
*/
bool dump_raw_tape(std::ostream &os) const noexcept;
/** @private Structural values. */
std::unique_ptr<uint64_t[]> tape{};
/** @private String values.
*
* Should be at least byte_capacity.
*/
std::unique_ptr<uint8_t[]> string_buf{};
private:
inline error_code allocate(size_t len) noexcept;
template<typename T>
friend class simdjson::minify;
friend class parser;
}; // class document
/**
* A JSON element.
*
* References an element in a JSON document, representing a JSON null, boolean, string, number,
* array or object.
*/
class element : protected internal::tape_ref {
public:
/** Create a new, invalid element. */
really_inline element() noexcept;
/** The type of this element. */
really_inline element_type type() const noexcept;
/** Whether this element is a json `null`. */
really_inline bool is_null() const noexcept;
/**
* Tell whether the value can be cast to provided type (T).
*
* Supported types:
* - Boolean: bool
* - Number: double, uint64_t, int64_t
* - String: std::string_view, const char *
* - Array: dom::array
* - Object: dom::object
*
* @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
*/
template<typename T>
really_inline bool is() const noexcept;
/**
* Get the value as the provided type (T).
*
* Supported types:
* - Boolean: bool
* - Number: double, uint64_t, int64_t
* - String: std::string_view, const char *
* - Array: dom::array
* - Object: dom::object
*
* @tparam T bool, double, uint64_t, int64_t, std::string_view, const char *, dom::array, dom::object
*
* @returns The value cast to the given type, or:
* INCORRECT_TYPE if the value cannot be cast to the given type.
*/
template<typename T>
really_inline simdjson_result<T> get() const noexcept;
#if SIMDJSON_EXCEPTIONS
/**
* Read this element as a boolean.
*
* @return The boolean value
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a boolean.
*/
inline operator bool() const noexcept(false);
/**
* Read this element as a null-terminated string.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string.
*
* @return The string value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
*/
inline explicit operator const char*() const noexcept(false);
/**
* Read this element as a null-terminated string.
*
* Does *not* convert other types to a string; requires that the JSON type of the element was
* an actual string.
*
* @return The string value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a string.
*/
inline operator std::string_view() const noexcept(false);
/**
* Read this element as an unsigned integer.
*
* @return The integer value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
*/
inline operator uint64_t() const noexcept(false);
/**
* Read this element as an signed integer.
*
* @return The integer value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an integer
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits
*/
inline operator int64_t() const noexcept(false);
/**
* Read this element as an double.
*
* @return The double value.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not a number
* @exception simdjson_error(NUMBER_OUT_OF_RANGE) if the integer doesn't fit in 64 bits or is negative
*/
inline operator double() const noexcept(false);
/**
* Read this element as a JSON array.
*
* @return The JSON array.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
*/
inline operator array() const noexcept(false);
/**
* Read this element as a JSON object (key/value pairs).
*
* @return The JSON object.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an object
*/
inline operator object() const noexcept(false);
/**
* Iterate over each element in this array.
*
* @return The beginning of the iteration.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
*/
inline dom::array::iterator begin() const noexcept(false);
/**
* Iterate over each element in this array.
*
* @return The end of the iteration.
* @exception simdjson_error(INCORRECT_TYPE) if the JSON element is not an array
*/
inline dom::array::iterator end() const noexcept(false);
#endif // SIMDJSON_EXCEPTIONS
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
* - INCORRECT_TYPE if this is not an object
*/
inline simdjson_result<element> operator[](const char *key) const noexcept;
/**
* Get the value associated with the given JSON pointer.
*
* dom::parser parser;
* element doc = parser.parse(R"({ "foo": { "a": [ 10, 20, 30 ] }})");
* doc.at("/foo/a/1") == 20
* doc.at("/")["foo"]["a"].at(1) == 20
* doc.at("")["foo"]["a"].at(1) == 20
*
* @return The value associated with the given JSON pointer, or:
* - NO_SUCH_FIELD if a field does not exist in an object
* - INDEX_OUT_OF_BOUNDS if an array index is larger than an array length
* - INCORRECT_TYPE if a non-integer is used to access an array
* - INVALID_JSON_POINTER if the JSON pointer is invalid and cannot be parsed
*/
inline simdjson_result<element> at(const std::string_view &json_pointer) const noexcept;
/**
* Get the value at the given index.
*
* @return The value at the given index, or:
* - INDEX_OUT_OF_BOUNDS if the array index is larger than an array length
*/
inline simdjson_result<element> at(size_t index) const noexcept;
/**
* Get the value associated with the given key.
*
* The key will be matched against **unescaped** JSON:
*
* dom::parser parser;
* parser.parse(R"({ "a\n": 1 })")["a\n"].get<uint64_t>().value == 1
* parser.parse(R"({ "a\n": 1 })")["a\\n"].get<uint64_t>().error == NO_SUCH_FIELD
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key(const std::string_view &key) const noexcept;
/**
* Get the value associated with the given key in a case-insensitive manner.
*
* Note: The key will be matched against **unescaped** JSON.
*
* @return The value associated with this field, or:
* - NO_SUCH_FIELD if the field does not exist in the object
*/
inline simdjson_result<element> at_key_case_insensitive(const std::string_view &key) const noexcept;
/** @private for debugging. Prints out the root element. */
inline bool dump_raw_tape(std::ostream &out) const noexcept;
private:
really_inline element(const document *doc, size_t json_index) noexcept;
friend class document;
friend class object;
friend class array;
friend struct simdjson_result<element>;
template<typename T>
friend class simdjson::minify;
};
/**
* Key/value pair in an object.
*/
class key_value_pair {
public:
std::string_view key;
element value;
private:
really_inline key_value_pair(const std::string_view &_key, element _value) noexcept;
friend class object;
};
// expectation: sizeof(scope_descriptor) = 64/8.
struct scope_descriptor {
uint32_t tape_index; // where, on the tape, does the scope ([,{) begins
uint32_t count; // how many elements in the scope
};
/**
* A persistent document parser.
*
* The parser is designed to be reused, holding the internal buffers necessary to do parsing,
* as well as memory for a single document. The parsed document is overwritten on each parse.
*
* This class cannot be copied, only moved, to avoid unintended allocations.
*
* @note This is not thread safe: one parser cannot produce two documents at the same time!
*/
class parser {
public:
/**
* Create a JSON parser.
*
* The new parser will have zero capacity.
*
* @param max_capacity The maximum document length the parser can automatically handle. The parser
* will allocate more capacity on an as needed basis (when it sees documents too big to handle)
* up to this amount. The parser still starts with zero capacity no matter what this number is:
* to allocate an initial capacity, call allocate() after constructing the parser.
* Defaults to SIMDJSON_MAXSIZE_BYTES (the largest single document simdjson can process).
*/
really_inline explicit parser(size_t max_capacity = SIMDJSON_MAXSIZE_BYTES) noexcept;
/**
* Take another parser's buffers and state.
*
* @param other The parser to take. Its capacity is zeroed.
*/
parser(parser &&other) = default;
parser(const parser &) = delete; ///< @private Disallow copying
/**
* Take another parser's buffers and state.
*
* @param other The parser to take. Its capacity is zeroed.
*/
parser &operator=(parser &&other) = default;
parser &operator=(const parser &) = delete; ///< @private Disallow copying
/** Deallocate the JSON parser. */
~parser()=default;
/**
* Load a JSON document from a file and return a reference to it.
*
* dom::parser parser;
* const element doc = parser.load("jsonexamples/twitter.json");
*
* ### IMPORTANT: Document Lifetime
*
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
* documents because it reuses the same buffers, but you *must* use the document before you
* destroy the parser or call parse() again.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than the file length, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param path The path to load.
* @return The document, or an error:
* - IO_ERROR if there was an error opening or reading the file.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails.
*/
inline simdjson_result<element> load(const std::string &path) & noexcept;
inline simdjson_result<element> load(const std::string &path) && = delete ;
/**
* Parse a JSON document and return a temporary reference to it.
*
* dom::parser parser;
* element doc = parser.parse(buf, len);
*
* ### IMPORTANT: Document Lifetime
*
* The JSON document still lives in the parser: this is the most efficient way to parse JSON
* documents because it reuses the same buffers, but you *must* use the document before you
* destroy the parser or call parse() again.
*
* ### REQUIRED: Buffer Padding
*
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
* those bytes are initialized to, as long as they are allocated.
*
* If realloc_if_needed is true, it is assumed that the buffer does *not* have enough padding,
* and it is copied into an enlarged temporary buffer before parsing.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than len, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param buf The JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes, unless
* realloc_if_needed is true.
* @param len The length of the JSON.
* @param realloc_if_needed Whether to reallocate and enlarge the JSON buffer to add padding.
* @return The document, or an error:
* - MEMALLOC if realloc_if_needed is true or the parser does not have enough capacity,
* and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and len > max_capacity.
* - other json errors if parsing fails.
*/
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) & noexcept;
inline simdjson_result<element> parse(const uint8_t *buf, size_t len, bool realloc_if_needed = true) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) & noexcept;
really_inline simdjson_result<element> parse(const char *buf, size_t len, bool realloc_if_needed = true) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const std::string &s) & noexcept;
really_inline simdjson_result<element> parse(const std::string &s) && =delete;
/** @overload parse(const uint8_t *buf, size_t len, bool realloc_if_needed) */
really_inline simdjson_result<element> parse(const padded_string &s) & noexcept;
really_inline simdjson_result<element> parse(const padded_string &s) && =delete;
/** @private We do not want to allow implicit conversion from C string to std::string. */
really_inline simdjson_result<element> parse(const char *buf) noexcept = delete;
/**
* Load a file containing many JSON documents.
*
* dom::parser parser;
* for (const element doc : parser.load_many(path)) {
* cout << std::string(doc["title"]) << endl;
* }
*
* ### Format
*
* The file must contain a series of one or more JSON documents, concatenated into a single
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
* then starts parsing the next document at that point. (It does this with more parallelism and
* lookahead than you might think, though.)
*
* documents that consist of an object or array may omit the whitespace between them, concatenating
* with no separator. documents that consist of a single primitive (i.e. documents that are not
* arrays or objects) MUST be separated with whitespace.
*
* ### Error Handling
*
* All errors are returned during iteration: if there is a global error such as memory allocation,
* it will be yielded as the first result. Iteration always stops after the first error.
*
* As with all other simdjson methods, non-exception error handling is readily available through
* the same interface, requiring you to check the error before using the document:
*
* dom::parser parser;
* for (auto [doc, error] : parser.load_many(path)) {
* if (error) { cerr << error << endl; exit(1); }
* cout << std::string(doc["title"]) << endl;
* }
*
* ### Threads
*
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
* hood to do some lookahead.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param s The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
* spot is cache-related: small enough to fit in cache, yet big enough to
* parse as many documents as possible in one tight loop.
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
* @return The stream. If there is an error, it will be returned during iteration. An empty input
* will yield 0 documents rather than an EMPTY error. Errors:
* - IO_ERROR if there was an error opening or reading the file.
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails.
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
* - other json errors if parsing fails.
*/
inline document_stream load_many(const std::string &path, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/**
* Parse a buffer containing many JSON documents.
*
* dom::parser parser;
* for (const element doc : parser.parse_many(buf, len)) {
* cout << std::string(doc["title"]) << endl;
* }
*
* ### Format
*
* The buffer must contain a series of one or more JSON documents, concatenated into a single
* buffer, separated by whitespace. It effectively parses until it has a fully valid document,
* then starts parsing the next document at that point. (It does this with more parallelism and
* lookahead than you might think, though.)
*
* documents that consist of an object or array may omit the whitespace between them, concatenating
* with no separator. documents that consist of a single primitive (i.e. documents that are not
* arrays or objects) MUST be separated with whitespace.
*
* ### Error Handling
*
* All errors are returned during iteration: if there is a global error such as memory allocation,
* it will be yielded as the first result. Iteration always stops after the first error.
*
* As with all other simdjson methods, non-exception error handling is readily available through
* the same interface, requiring you to check the error before using the document:
*
* dom::parser parser;
* for (auto [doc, error] : parser.parse_many(buf, len)) {
* if (error) { cerr << error << endl; exit(1); }
* cout << std::string(doc["title"]) << endl;
* }
*
* ### REQUIRED: Buffer Padding
*
* The buffer must have at least SIMDJSON_PADDING extra allocated bytes. It does not matter what
* those bytes are initialized to, as long as they are allocated.
*
* ### Threads
*
* When compiled with SIMDJSON_THREADS_ENABLED, this method will use a single thread under the
* hood to do some lookahead.
*
* ### Parser Capacity
*
* If the parser's current capacity is less than batch_size, it will allocate enough capacity
* to handle it (up to max_capacity).
*
* @param buf The concatenated JSON to parse. Must have at least len + SIMDJSON_PADDING allocated bytes.
* @param len The length of the concatenated JSON.
* @param batch_size The batch size to use. MUST be larger than the largest document. The sweet
* spot is cache-related: small enough to fit in cache, yet big enough to
* parse as many documents as possible in one tight loop.
* Defaults to 10MB, which has been a reasonable sweet spot in our tests.
* @return The stream. If there is an error, it will be returned during iteration. An empty input
* will yield 0 documents rather than an EMPTY error. Errors:
* - MEMALLOC if the parser does not have enough capacity and memory allocation fails
* - CAPACITY if the parser does not have enough capacity and batch_size > max_capacity.
* - other json errors if parsing fails.
*/
inline document_stream parse_many(const uint8_t *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline document_stream parse_many(const char *buf, size_t len, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline document_stream parse_many(const std::string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @overload parse_many(const uint8_t *buf, size_t len, size_t batch_size) */
inline document_stream parse_many(const padded_string &s, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept;
/** @private We do not want to allow implicit conversion from C string to std::string. */
really_inline simdjson_result<element> parse_many(const char *buf, size_t batch_size = DEFAULT_BATCH_SIZE) noexcept = delete;
/**
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
* and `max_depth` depth.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return The error, if there is one.
*/
WARN_UNUSED inline error_code allocate(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
/**
* @private deprecated because it returns bool instead of error_code, which is our standard for
* failures. Use allocate() instead.
*
* Ensure this parser has enough memory to process JSON documents up to `capacity` bytes in length
* and `max_depth` depth.
*
* @param capacity The new capacity.
* @param max_depth The new max_depth. Defaults to DEFAULT_MAX_DEPTH.
* @return true if successful, false if allocation failed.
*/
[[deprecated("Use allocate() instead.")]]
WARN_UNUSED inline bool allocate_capacity(size_t capacity, size_t max_depth = DEFAULT_MAX_DEPTH) noexcept;
/**
* The largest document this parser can support without reallocating.
*
* @return Current capacity, in bytes.
*/
really_inline size_t capacity() const noexcept;
/**
* The largest document this parser can automatically support.
*
* The parser may reallocate internal buffers as needed up to this amount.
*
* @return Maximum capacity, in bytes.
*/
really_inline size_t max_capacity() const noexcept;
/**
* The maximum level of nested object and arrays supported by this parser.
*
* @return Maximum depth, in bytes.
*/
really_inline size_t max_depth() const noexcept;
/**
* Set max_capacity. This is the largest document this parser can automatically support.
*
* The parser may reallocate internal buffers as needed up to this amount.
*
* This call will not allocate or deallocate, even if capacity is currently above max_capacity.
*
* @param max_capacity The new maximum capacity, in bytes.
*/
really_inline void set_max_capacity(size_t max_capacity) noexcept;
/** @private Use the new DOM API instead */
class Iterator;
/** @private Use simdjson_error instead */
using InvalidJSON [[deprecated("Use simdjson_error instead")]] = simdjson_error;
/** @private Next location to write to in the tape */
uint32_t current_loc{0};
/** @private Number of structural indices passed from stage 1 to stage 2 */
uint32_t n_structural_indexes{0};
/** @private Structural indices passed from stage 1 to stage 2 */
std::unique_ptr<uint32_t[]> structural_indexes{};
/** @private Tape location of each open { or [ */
std::unique_ptr<scope_descriptor[]> containing_scope{};
#ifdef SIMDJSON_USE_COMPUTED_GOTO
/** @private Return address of each open { or [ */
std::unique_ptr<void*[]> ret_address{};
#else
/** @private Return address of each open { or [ */
std::unique_ptr<char[]> ret_address{};
#endif
/** @private Next write location in the string buf for stage 2 parsing */
uint8_t *current_string_buf_loc{};
/** @private Use `if (parser.parse(...).error())` instead */
bool valid{false};
/** @private Use `parser.parse(...).error()` instead */
error_code error{UNINITIALIZED};
/** @private Use `parser.parse(...).value()` instead */
document doc{};
/** @private returns true if the document parsed was valid */
[[deprecated("Use the result of parser.parse() instead")]]
inline bool is_valid() const noexcept;
/**
* @private return an error code corresponding to the last parsing attempt, see
* simdjson.h will return UNITIALIZED if no parsing was attempted
*/
[[deprecated("Use the result of parser.parse() instead")]]
inline int get_error_code() const noexcept;
/** @private return the string equivalent of "get_error_code" */
[[deprecated("Use error_message() on the result of parser.parse() instead, or cout << error")]]
inline std::string get_error_message() const noexcept;
/** @private */
[[deprecated("Use cout << on the result of parser.parse() instead")]]
inline bool print_json(std::ostream &os) const noexcept;