Skip to content

Commit fb65be6

Browse files
committed
Major surgery.
1 parent 726eb5a commit fb65be6

26 files changed

+37184
-1285
lines changed

Makefile

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,32 +6,48 @@
66

77
.PHONY: clean cleandist
88

9-
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Iinclude/linux -Idependencies/double-conversion -Ldependencies/double-conversion/release
9+
CXXFLAGS = -std=c++11 -O2 -march=native -Wall -Wextra -Wshadow -Iinclude -Ibenchmark/linux -Idependencies/double-conversion -Idependencies/rapidjson/include -Ldependencies/double-conversion/release
1010
LIBFLAGS = -ldouble-conversion
1111

12-
EXECUTABLES=parse jsoncheck
13-
HEADERS=include/common_defs.h include/jsonioutil.h include/linux/linux-perf-events.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h
14-
LIBFILES=src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp
12+
EXECUTABLES=parse jsoncheck minifiercompetition parsingcompetition
13+
HEADERS=include/jsonparser.h include/common_defs.h include/jsonioutil.h benchmark/benchmark.h benchmark/linux/linux-perf-events.h include/simdprune_tables.h include/simdjson_internal.h include/stage1_find_marks.h include/stage2_flatten.h include/stage3_ape_machine.h include/stage4_shovel_machine.h include/jsonminifier.h
14+
LIBFILES=src/jsonioutil.cpp src/jsonparser.cpp src/stage1_find_marks.cpp src/stage2_flatten.cpp src/stage3_ape_machine.cpp src/stage4_shovel_machine.cpp src/jsonminifier.cpp
1515
EXTRA_EXECUTABLES=parsenocheesy parsenodep8
1616

17-
LIDDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
17+
LIBDOUBLE:=dependencies/double-conversion/release/libdouble-conversion.a
18+
RAPIDJSON_INCLUDE:=dependencies/rapidjson/include
1819

19-
LIBS=$(LIDDOUBLE)
20+
LIBS=$(RAPIDJSON_INCLUDE) $(LIBDOUBLE)
2021

2122
all: $(LIBS) $(EXECUTABLES)
2223

2324
test: jsoncheck
2425
./jsoncheck
2526

26-
$(LIDDOUBLE) : dependencies/double-conversion/README.md
27+
$(RAPIDJSON_INCLUDE):
28+
git submodule update --init --recursive
29+
30+
$(LIBDOUBLE) : dependencies/double-conversion/README.md
2731
cd dependencies/double-conversion/ && mkdir -p release && cd release && cmake .. && make
2832

33+
34+
bench: benchmarks/bench.cpp $(RAPIDJSON_INCLUDE) $(HEADERS)
35+
$(CXX) -std=c++11 -O3 -o $@ benchmarks/bench.cpp -I$(RAPIDJSON_INCLUDE) -Iinclude -march=native -lm -Wall -Wextra -Wno-narrowing
36+
37+
38+
2939
parse: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
3040
$(CXX) $(CXXFLAGS) -o parse $(LIBFILES) benchmark/parse.cpp $(LIBFLAGS)
3141

3242
jsoncheck:tests/jsoncheck.cpp $(HEADERS) $(LIBFILES)
3343
$(CXX) $(CXXFLAGS) -o jsoncheck $(LIBFILES) tests/jsoncheck.cpp -I. $(LIBFLAGS)
3444

45+
minifiercompetition: benchmark/minifiercompetition.cpp $(HEADERS) $(LIBFILES)
46+
$(CXX) $(CXXFLAGS) -o minifiercompetition $(LIBFILES) benchmark/minifiercompetition.cpp -I. $(LIBFLAGS)
47+
48+
parsingcompetition: benchmark/parsingcompetition.cpp $(HEADERS) $(LIBFILES)
49+
$(CXX) $(CXXFLAGS) -o parsingcompetition $(LIBFILES) benchmark/parsingcompetition.cpp -I. $(LIBFLAGS)
50+
3551

3652
parsehisto: benchmark/parse.cpp $(HEADERS) $(LIBFILES)
3753
$(CXX) $(CXXFLAGS) -o parsehisto benchmark/parse.cpp $(LIBFILES) $(LIBFLAGS) -DBUILDHISTOGRAM
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ const char *unitname = "cycles";
1717
: \
1818
: /* no read only */ \
1919
"%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
20-
); \
20+
); \
2121
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
2222
} while (0)
2323

@@ -31,7 +31,7 @@ const char *unitname = "cycles";
3131
: "=r"(cyc_high), "=r"(cyc_low) \
3232
: /* no read only registers */ \
3333
: "%rax", "%rbx", "%rcx", "%rdx" /* clobbers */ \
34-
); \
34+
); \
3535
(cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
3636
} while (0)
3737

@@ -106,9 +106,9 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
106106
float cycle_per_op = (min_diff) / (double)S; \
107107
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
108108
if (verbose) \
109-
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
109+
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
110110
if (verbose) \
111-
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
111+
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
112112
if (verbose) \
113113
printf("\n"); \
114114
if (!verbose) \
@@ -117,7 +117,7 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
117117
} while (0)
118118

119119
// like BEST_TIME, but no check
120-
#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \
120+
#define BEST_TIME_NOCHECK(test, pre, repeat, size, verbose) \
121121
do { \
122122
if (global_rdtsc_overhead == UINT64_MAX) { \
123123
RDTSC_SET_OVERHEAD(rdtsc_overhead_func(1), repeat); \
@@ -143,10 +143,10 @@ uint64_t global_rdtsc_overhead = (uint64_t)UINT64_MAX;
143143
float cycle_per_op = (min_diff) / (double)S; \
144144
float avg_cycle_per_op = (sum_diff) / ((double)S * repeat); \
145145
if (verbose) \
146-
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
146+
printf(" %.3f %s per input byte (best) ", cycle_per_op, unitname); \
147+
if (verbose) \
148+
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
147149
if (verbose) \
148-
printf(" %.3f %s per input byte (avg) ", avg_cycle_per_op, unitname); \
149-
if (verbose) \
150150
printf("\n"); \
151151
if (!verbose) \
152152
printf(" %.3f ", cycle_per_op); \
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// https://github.com/WojciechMula/toys/blob/master/000helpers/linux-perf-events.h
2+
#pragma once
3+
#ifdef __linux__
4+
5+
#include <asm/unistd.h> // for __NR_perf_event_open
6+
#include <linux/perf_event.h> // for perf event constants
7+
#include <sys/ioctl.h> // for ioctl
8+
#include <unistd.h> // for syscall
9+
10+
#include <cerrno> // for errno
11+
#include <cstring> // for memset
12+
#include <stdexcept>
13+
14+
#include <vector>
15+
16+
template <int TYPE = PERF_TYPE_HARDWARE> class LinuxEvents {
17+
int fd;
18+
perf_event_attr attribs;
19+
int num_events;
20+
std::vector<uint64_t> temp_result_vec;
21+
std::vector<uint64_t> ids;
22+
23+
public:
24+
LinuxEvents(std::vector<int> config_vec) : fd(0) {
25+
memset(&attribs, 0, sizeof(attribs));
26+
attribs.type = TYPE;
27+
attribs.size = sizeof(attribs);
28+
attribs.disabled = 1;
29+
attribs.exclude_kernel = 1;
30+
attribs.exclude_hv = 1;
31+
32+
attribs.sample_period = 0;
33+
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
34+
const int pid = 0; // the current process
35+
const int cpu = -1; // all CPUs
36+
const unsigned long flags = 0;
37+
38+
int group = -1; // no group
39+
num_events = config_vec.size();
40+
u32 i = 0;
41+
for (auto config : config_vec) {
42+
attribs.config = config;
43+
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
44+
if (fd == -1) {
45+
report_error("perf_event_open");
46+
}
47+
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
48+
if (group == -1) {
49+
group = fd;
50+
}
51+
}
52+
53+
temp_result_vec.resize(num_events * 2 + 1);
54+
}
55+
56+
~LinuxEvents() { close(fd); }
57+
58+
really_inline void start() {
59+
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
60+
report_error("ioctl(PERF_EVENT_IOC_RESET)");
61+
}
62+
63+
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
64+
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
65+
}
66+
}
67+
68+
really_inline void end(std::vector<unsigned long long> &results) {
69+
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
70+
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
71+
}
72+
73+
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
74+
report_error("read");
75+
}
76+
// our actual results are in slots 1,3,5, ... of this structure
77+
// we really should be checking our ids obtained earlier to be safe
78+
for (u32 i = 1; i < temp_result_vec.size(); i += 2) {
79+
results[i / 2] = temp_result_vec[i];
80+
}
81+
}
82+
83+
private:
84+
void report_error(const std::string &context) {
85+
throw std::runtime_error(context + ": " + std::string(strerror(errno)));
86+
}
87+
};
88+
#endif

0 commit comments

Comments
 (0)