forked from simdjson/simdjson
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparse_many_test.cpp
More file actions
139 lines (125 loc) · 4.86 KB
/
parse_many_test.cpp
File metadata and controls
139 lines (125 loc) · 4.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <cstring>
#ifndef _MSC_VER
#include <dirent.h>
#else
// Microsoft can't be bothered to provide standard utils.
#include <dirent_portable.h>
#endif
#include <unistd.h>
#include <cinttypes>
#include <cstdio>
#include <cstdlib>
#include "simdjson.h"
/**
* Does the file filename end with the given extension.
*/
static bool has_extension(const char *filename, const char *extension) {
const char *ext = strrchr(filename, '.');
return ((ext != nullptr) && (strcmp(ext, extension) == 0));
}
bool starts_with(const char *pre, const char *str) {
size_t len_pre = strlen(pre), len_str = strlen(str);
return len_str < len_pre ? false : strncmp(pre, str, len_pre) == 0;
}
bool contains(const char *pre, const char *str) {
return (strstr(str, pre) != nullptr);
}
bool validate(const char *dirname) {
bool everything_fine = true;
const char *extension1 = ".ndjson";
const char *extension2 = ".jsonl";
const char *extension3 = ".json"; // bad json files shoud fail
size_t dirlen = strlen(dirname);
struct dirent **entry_list;
int c = scandir(dirname, &entry_list, nullptr, alphasort);
if (c < 0) {
fprintf(stderr, "error accessing %s \n", dirname);
return false;
}
if (c == 0) {
printf("nothing in dir %s \n", dirname);
return false;
}
bool *is_file_as_expected = new bool[c];
for (int i = 0; i < c; i++) {
is_file_as_expected[i] = true;
}
size_t how_many = 0;
bool needsep = (strlen(dirname) > 1) && (dirname[strlen(dirname) - 1] != '/');
/*For all files in the folder*/
for (int i = 0; i < c; i++) {
const char *name = entry_list[i]->d_name;
if (has_extension(name, extension1) || has_extension(name, extension2) || has_extension(name, extension3)) {
/* Finding the file path */
printf("validating: file %s ", name);
fflush(nullptr);
size_t namelen = strlen(name);
size_t fullpathlen = dirlen + 1 + namelen + 1;
char *fullpath = static_cast<char *>(malloc(fullpathlen));
snprintf(fullpath, fullpathlen, "%s%s%s", dirname, needsep ? "/" : "", name);
/* The actual test*/
auto [json, error] = simdjson::padded_string::load(fullpath);
if (!error) {
simdjson::dom::parser parser;
++how_many;
for (auto result : parser.parse_many(json)) {
error = result.error();
}
}
printf("%s\n", error ? "ok" : "invalid");
/* Check if the file is supposed to pass or not. Print the results */
if (contains("EXCLUDE", name)) {
// skipping
how_many--;
} else if (starts_with("pass", name) and (has_extension(extension1, name) or has_extension(extension2, name)) and error) {
is_file_as_expected[i] = false;
printf("warning: file %s should pass but it fails. Error is: %s\n",
name, error_message(error));
printf("size of file in bytes: %zu \n", json.size());
everything_fine = false;
} else if ( starts_with("fail", name) and (not starts_with("fail10.json", name)) and !error) {
is_file_as_expected[i] = false;
printf("warning: file %s should fail but it passes.\n", name);
printf("size of file in bytes: %zu \n", json.size());
everything_fine = false;
}
free(fullpath);
}
}
printf("%zu files checked.\n", how_many);
if (everything_fine) {
printf("All ok!\n");
} else {
fprintf(stderr,
"There were problems! Consider reviewing the following files:\n");
for (int i = 0; i < c; i++) {
if (!is_file_as_expected[i]) {
fprintf(stderr, "%s \n", entry_list[i]->d_name);
}
}
}
printf("Note that json stream expects sequences of objects and arrays, so otherwise valid json files can fail by design.\n");
for (int i = 0; i < c; ++i) {
free(entry_list[i]);
}
free(entry_list);
delete[] is_file_as_expected;
return everything_fine;
}
int main(int argc, char *argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <directorywithjsonfiles>"
<< std::endl;
#ifndef SIMDJSON_TEST_DATA_DIR
std::cout
<< "We are going to assume you mean to use the 'jsonchecker' directory."
<< std::endl;
return validate("jsonchecker/") ? EXIT_SUCCESS : EXIT_FAILURE;
#else
std::cout << "We are going to assume you mean to use the '"
<< SIMDJSON_TEST_DATA_DIR << "' directory." << std::endl;
return validate(SIMDJSON_TEST_DATA_DIR) ? EXIT_SUCCESS : EXIT_FAILURE;
#endif
}
return validate(argv[1]) ? EXIT_SUCCESS : EXIT_FAILURE;
}