Skip to content

Commit fe9c79d

Browse files
committed
Updated some performance evaluation code.
1 parent 9ed3a4a commit fe9c79d

File tree

3 files changed

+56
-29
lines changed

3 files changed

+56
-29
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ EXECUTABLES=parse
1515

1616
all: $(EXECUTABLES)
1717

18-
parse: main.cpp common_defs.h
18+
parse: main.cpp common_defs.h linux-perf-events.h
1919
$(CXX) $(CXXFLAGS) -o parse main.cpp
2020

2121

linux-perf-events.h

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,58 +11,75 @@
1111
#include <cstring> // for memset
1212
#include <stdexcept>
1313

14+
#include <vector>
1415

1516
template <int TYPE = PERF_TYPE_HARDWARE>
1617
class LinuxEvents {
17-
1818
int fd;
1919
perf_event_attr attribs;
20-
20+
int num_events;
21+
std::vector<uint64_t> temp_result_vec;
22+
std::vector<uint64_t> ids;
2123
public:
22-
LinuxEvents(int config) : fd(0) {
24+
LinuxEvents(std::vector<int> config_vec) : fd(0) {
2325
memset(&attribs, 0, sizeof(attribs));
2426
attribs.type = TYPE;
2527
attribs.size = sizeof(attribs);
26-
attribs.config = config;
2728
attribs.disabled = 1;
2829
attribs.exclude_kernel = 1;
2930
attribs.exclude_hv = 1;
3031

32+
attribs.sample_period = 0;
33+
attribs.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
3134
const int pid = 0; // the current process
3235
const int cpu = -1; // all CPUs
33-
const int group = -1; // no group
3436
const unsigned long flags = 0;
35-
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
36-
if (fd == -1) {
37-
report_error("perf_event_open");
37+
38+
int group = -1; // no group
39+
num_events = config_vec.size();
40+
u32 i = 0;
41+
for (auto config: config_vec) {
42+
attribs.config = config;
43+
fd = syscall(__NR_perf_event_open, &attribs, pid, cpu, group, flags);
44+
if (fd == -1) {
45+
report_error("perf_event_open");
46+
}
47+
ioctl(fd, PERF_EVENT_IOC_ID, &ids[i++]);
48+
if (group == -1) {
49+
group = fd;
50+
}
3851
}
52+
53+
temp_result_vec.resize(num_events*2 + 1);
3954
}
4055

4156
~LinuxEvents() {
4257
close(fd);
4358
}
4459

45-
void start() {
46-
if (ioctl(fd, PERF_EVENT_IOC_RESET, 0) == -1) {
60+
really_inline void start() {
61+
if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
4762
report_error("ioctl(PERF_EVENT_IOC_RESET)");
4863
}
4964

50-
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) == -1) {
65+
if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
5166
report_error("ioctl(PERF_EVENT_IOC_ENABLE)");
5267
}
5368
}
5469

55-
unsigned long end() {
56-
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0) == -1) {
70+
really_inline void end(std::vector<unsigned long long> & results) {
71+
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
5772
report_error("ioctl(PERF_EVENT_IOC_DISABLE)");
5873
}
5974

60-
unsigned long result;
61-
if (read(fd, &result, sizeof(result)) == -1) {
75+
if (read(fd, &temp_result_vec[0], temp_result_vec.size() * 8) == -1) {
6276
report_error("read");
6377
}
64-
65-
return result;
78+
// our actual results are in slots 1,3,5, ... of this structure
79+
// we really should be checking our ids obtained earlier to be safe
80+
for (u32 i = 1; i < temp_result_vec.size(); i+=2) {
81+
results[i/2] = temp_result_vec[i];
82+
}
6683
}
6784

6885
private:
@@ -72,3 +89,5 @@ class LinuxEvents {
7289

7390
};
7491
#endif
92+
93+

main.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include "linux-perf-events.h"
21
#include <iostream>
32
#include <iomanip>
43
#include <chrono>
@@ -13,6 +12,7 @@
1312
#include <x86intrin.h>
1413
#include <assert.h>
1514
#include "common_defs.h"
15+
#include "linux-perf-events.h"
1616

1717
using namespace std;
1818

@@ -1200,34 +1200,42 @@ int main(int argc, char * argv[]) {
12001200
#endif
12011201

12021202
#ifndef SQUASH_COUNTERS
1203-
LinuxEvents<PERF_TYPE_HARDWARE> cycles(PERF_COUNT_HW_CPU_CYCLES);
1204-
LinuxEvents<PERF_TYPE_HARDWARE> instructions(PERF_COUNT_HW_INSTRUCTIONS);
1203+
vector<int> evts;
1204+
evts.push_back(PERF_COUNT_HW_CPU_CYCLES);
1205+
evts.push_back(PERF_COUNT_HW_INSTRUCTIONS);
1206+
LinuxEvents<PERF_TYPE_HARDWARE> unified(evts);
1207+
vector<u64> results;
1208+
results.resize(evts.size());
12051209
unsigned long cy1 = 0, cy2 = 0, cy3 = 0, cy4 = 0;
12061210
unsigned long cl1 = 0, cl2 = 0, cl3 = 0, cl4 = 0;
12071211
#endif
12081212
for (u32 i = 0; i < iterations; i++) {
12091213
auto start = std::chrono::steady_clock::now();
12101214
#ifndef SQUASH_COUNTERS
1211-
cycles.start(); instructions.start();
1215+
unified.start();
12121216
#endif
12131217
find_structural_bits(p.first, p.second, pj);
12141218
#ifndef SQUASH_COUNTERS
1215-
cl1 += instructions.end(); cy1 += cycles.end();
1216-
cycles.start(); instructions.start();
1219+
unified.end(results);
1220+
cy1 += results[0]; cl1 += results[1];
1221+
unified.start();
12171222
#endif
12181223
flatten_indexes(p.second, pj);
12191224
#ifndef SQUASH_COUNTERS
1220-
cl2 += instructions.end(); cy2 += cycles.end();
1221-
cycles.start(); instructions.start();
1225+
unified.end(results);
1226+
cy2 += results[0]; cl2 += results[1];
1227+
unified.start();
12221228
#endif
12231229
ape_machine(p.first, p.second, pj);
12241230
#ifndef SQUASH_COUNTERS
1225-
cl3 += instructions.end(); cy3 += cycles.end();
1226-
cycles.start(); instructions.start();
1231+
unified.end(results);
1232+
cy3 += results[0]; cl3 += results[1];
1233+
unified.start();
12271234
#endif
12281235
shovel_machine(p.first, p.second, pj);
12291236
#ifndef SQUASH_COUNTERS
1230-
cl4 += instructions.end(); cy4 += cycles.end();
1237+
unified.end(results);
1238+
cy4 += results[0]; cl4 += results[1];
12311239
#endif
12321240
auto end = std::chrono::steady_clock::now();
12331241
std::chrono::duration<double> secs = end - start;

0 commit comments

Comments
 (0)