forked from apache/arrow
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbenchmark_util.h
More file actions
139 lines (113 loc) · 4.55 KB
/
Copy pathbenchmark_util.h
File metadata and controls
139 lines (113 loc) · 4.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <cstdint>
#include <string>
#include "benchmark/benchmark.h"
#include "arrow/util/cpu_info.h"
namespace arrow {
// Benchmark changed its parameter type between releases from
// int to int64_t. As it doesn't have version macros, we need
// to apply C++ template magic.
template <typename Func>
struct BenchmarkArgsType;
// Pattern matching that extracts the vector element type of Benchmark::Args()
template <typename Values>
struct BenchmarkArgsType<benchmark::internal::Benchmark* (
benchmark::internal::Benchmark::*)(const std::vector<Values>&)> {
using type = Values;
};
using ArgsType =
typename BenchmarkArgsType<decltype(&benchmark::internal::Benchmark::Args)>::type;
using internal::CpuInfo;
static const CpuInfo* cpu_info = CpuInfo::GetInstance();
static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
static const int64_t kCantFitInL3Size = kL3Size * 4;
static const std::vector<int64_t> kMemorySizes = {kL1Size, kL2Size, kL3Size,
kCantFitInL3Size};
// 0 is treated as "no nulls"
static const std::vector<ArgsType> kInverseNullProportions = {10000, 100, 10, 2, 1, 0};
struct GenericItemsArgs {
// number of items processed per iteration
const int64_t size;
// proportion of nulls in generated arrays
double null_proportion;
explicit GenericItemsArgs(benchmark::State& state)
: size(state.range(0)), state_(state) {
if (state.range(1) == 0) {
this->null_proportion = 0.0;
} else {
this->null_proportion = std::min(1., 1. / static_cast<double>(state.range(1)));
}
}
~GenericItemsArgs() {
state_.counters["size"] = static_cast<double>(size);
state_.counters["null_percent"] = null_proportion * 100;
state_.SetItemsProcessed(state_.iterations() * size);
}
private:
benchmark::State& state_;
};
void BenchmarkSetArgsWithSizes(benchmark::internal::Benchmark* bench,
const std::vector<int64_t>& sizes = kMemorySizes) {
bench->Unit(benchmark::kMicrosecond);
for (const auto size : sizes) {
for (const auto inverse_null_proportion : kInverseNullProportions) {
bench->Args({static_cast<ArgsType>(size), inverse_null_proportion});
}
}
}
void BenchmarkSetArgs(benchmark::internal::Benchmark* bench) {
BenchmarkSetArgsWithSizes(bench, kMemorySizes);
}
void RegressionSetArgs(benchmark::internal::Benchmark* bench) {
// Regression do not need to account for cache hierarchy, thus optimize for
// the best case.
BenchmarkSetArgsWithSizes(bench, {kL1Size});
}
// RAII struct to handle some of the boilerplate in regression benchmarks
struct RegressionArgs {
// size of memory tested (per iteration) in bytes
const int64_t size;
// proportion of nulls in generated arrays
double null_proportion;
// If size_is_bytes is true, then it's a number of bytes, otherwise it's the
// number of items processed (for reporting)
explicit RegressionArgs(benchmark::State& state, bool size_is_bytes = true)
: size(state.range(0)), state_(state), size_is_bytes_(size_is_bytes) {
if (state.range(1) == 0) {
this->null_proportion = 0.0;
} else {
this->null_proportion = std::min(1., 1. / static_cast<double>(state.range(1)));
}
}
~RegressionArgs() {
state_.counters["size"] = static_cast<double>(size);
state_.counters["null_percent"] = null_proportion * 100;
if (size_is_bytes_) {
state_.SetBytesProcessed(state_.iterations() * size);
} else {
state_.SetItemsProcessed(state_.iterations() * size);
}
}
private:
benchmark::State& state_;
bool size_is_bytes_;
};
} // namespace arrow