Skip to content

Commit 488fcd7

Browse files
domoritzTheNeuralBit
authored andcommitted
ARROW-12832: [JS] Write benchmarks in TypeScript
Closes apache#10361 from domoritz/ts-perf Authored-by: Dominik Moritz <domoritz@gmail.com> Signed-off-by: Brian Hulette <hulettbh@gmail.com>
1 parent 618b286 commit 488fcd7

6 files changed

Lines changed: 83 additions & 85 deletions

File tree

js/DEVELOP.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ Uses [lerna](https://github.com/lerna/lerna) to publish each build target to npm
7272

7373
# Running the Performance Benchmarks
7474

75-
First, compile the bundles with `yarn build` and generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. You can change the target you want to test by changing the imports in `perf/index.js`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
75+
First, generate perf data with `yarn create:perfdata`. Then you can run the benchmarks with `yarn perf`. To print the results to stderr as JSON, add the `--json` flag (e.g. `yarn perf --json 2> perf.json`).
76+
77+
You can change the target you want to test by changing the imports in `perf/index.ts`. Note that you need to compile the bundles with `yarn build` before you can import them.
7678

7779
# Updating the Arrow format flatbuffers generated code
7880

js/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
"build": "cross-env NODE_NO_WARNINGS=1 gulp build",
1212
"clean": "cross-env NODE_NO_WARNINGS=1 gulp clean",
1313
"debug": "cross-env NODE_NO_WARNINGS=1 gulp debug",
14-
"perf": "node ./perf/index.js",
14+
"perf": "ts-node-transpile-only ./perf/index.ts",
1515
"test:integration": "node ./bin/integration.js --mode validate",
1616
"create:perfdata": "python ./test/data/tables/generate.py ./test/data/tables/tracks.arrow",
1717
"release": "./npm-release.sh",
Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,23 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
const fs = require('fs');
19-
const path = require('path');
20-
const glob = require('glob');
18+
import {readFileSync} from 'fs';
19+
import {resolve, parse} from 'path';
20+
import {sync} from 'glob';
2121

22-
const config = [];
23-
const filenames = glob.sync(path.resolve(__dirname, `../test/data/tables/`, `*.arrow`));
22+
const filenames = sync(resolve(__dirname, `../test/data/tables/`, `*.arrow`));
2423

25-
const countBys = {
26-
tracks: ['origin', 'destination']
27-
}
28-
const counts = {
29-
tracks: [
30-
{column: 'lat', test: 'gt', value: 0 },
31-
{column: 'lng', test: 'gt', value: 0 },
32-
{column: 'origin', test: 'eq', value: 'Seattle'},
33-
]
34-
}
24+
export default filenames.map(filename => {
25+
const { name } = parse(filename);
26+
return {
27+
name,
28+
buffers: [readFileSync(filename)],
29+
countBys: ['origin', 'destination'],
30+
counts: [
31+
{column: 'lat', test: 'gt' as 'gt' | 'eq', value: 0 },
32+
{column: 'lng', test: 'gt' as 'gt' | 'eq', value: 0 },
33+
{column: 'origin', test: 'eq' as 'gt' | 'eq', value: 'Seattle'},
34+
],
35+
};
36+
});
3537

36-
for (const filename of filenames) {
37-
const { name } = path.parse(filename);
38-
if (name in counts) {
39-
config.push({
40-
name,
41-
buffers: [fs.readFileSync(filename)],
42-
countBys: countBys[name],
43-
counts: counts[name],
44-
});
45-
}
46-
}
47-
48-
module.exports = config;

js/perf/index.js renamed to js/perf/index.ts

Lines changed: 58 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,35 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
// Use the ES5 UMD target as perf baseline
19-
// const { predicate, Table, RecordBatchReader } = require('../targets/es5/umd');
20-
// const { predicate, Table, RecordBatchReader } = require('../targets/es5/cjs');
21-
// const { predicate, Table, RecordBatchReader } = require('../targets/es2015/umd');
22-
const { predicate, Table, DataFrame, RecordBatchReader } = require('../targets/es2015/cjs');
23-
const kleur = require('kleur');
24-
const b = require('benny');
18+
// Alternatively, use bundles for performance tests
19+
// import * as Arrow from '../targets/es5/umd';
20+
// import * as Arrow from '../targets/es5/cjs';
21+
// import * as Arrow from '../targets/es2015/umd';
22+
// import * as Arrow from '../targets/es2015/cjs';
23+
24+
import * as Arrow from '../src/Arrow';
25+
26+
import config from './config';
27+
import b from 'benny';
28+
import { CaseResult, Summary } from 'benny/lib/internal/common-types';
29+
import kleur from 'kleur';
30+
31+
const { predicate, Table, DataFrame, RecordBatchReader } = Arrow;
2532
const { col } = predicate;
2633

34+
2735
const args = process.argv.slice(2);
2836
const json = args[0] === '--json';
2937

3038
const formatter = new Intl.NumberFormat();
31-
function formatNumber(number, precision) {
39+
function formatNumber(number: number, precision = 0) {
3240
const rounded = number > precision * 10 ? Math.round(number) : parseFloat((number).toPrecision(precision));
33-
return formatter.format(rounded)
41+
return formatter.format(rounded);
3442
}
3543

36-
const results = []
44+
const results: CaseResult[] = [];
3745

38-
function cycle(result, _summary) {
46+
function cycle(result: CaseResult, _summary: Summary) {
3947
const duration = result.details.median * 1000;
4048
if (json) {
4149
results.push(result);
@@ -45,7 +53,7 @@ function cycle(result, _summary) {
4553
);
4654
}
4755

48-
for (const { name, buffers } of require('./config')) {
56+
for (const { name, buffers } of config) {
4957
b.suite(
5058
`Parse "${name}"`,
5159

@@ -54,42 +62,42 @@ for (const { name, buffers } of require('./config')) {
5462
}),
5563

5664
b.add(`readBatches`, () => {
57-
for (recordBatch of RecordBatchReader.from(buffers)) {}
65+
for (const _recordBatch of RecordBatchReader.from(buffers)) {}
5866
}),
5967

6068
b.cycle(cycle)
6169
);
6270

63-
const table = Table.from(buffers)
71+
const table = Table.from(buffers);
6472
const schema = table.schema;
6573

6674
const suites = [{
6775
name: `Get "${name}" values by index`,
68-
fn(vector) {
76+
fn(vector: Arrow.Column<any>) {
6977
for (let i = -1, n = vector.length; ++i < n;) {
70-
value = vector.get(i);
78+
vector.get(i);
7179
}
7280
}
7381
}, {
7482
name: `Iterate "${name}" vectors`,
75-
fn(vector) { for (value of vector) {} }
83+
fn(vector: Arrow.Column<any>) { for (const _value of vector) {} }
7684
}, {
7785
name: `Slice toArray "${name}" vectors`,
78-
fn(vector) { xs = vector.slice().toArray(); }
86+
fn(vector: Arrow.Column<any>) { vector.slice().toArray(); }
7987
}, {
8088
name: `Slice "${name}" vectors`,
81-
fn(vector) { xs = vector.slice(); }
89+
fn(vector: Arrow.Column<any>) { vector.slice(); }
8290
}];
8391

8492
for (const {name, fn} of suites) {
8593
b.suite(
8694
name,
8795

8896
...schema.fields.map((f, i) => {
89-
const vector = table.getColumnAt(i);
97+
const vector = table.getColumnAt(i)!;
9098
return b.add(`name: '${f.name}', length: ${formatNumber(vector.length)}, type: ${vector.type}`, () => {
91-
fn(vector)
92-
})
99+
fn(vector);
100+
});
93101
}),
94102

95103
b.cycle(cycle)
@@ -98,14 +106,14 @@ for (const { name, buffers } of require('./config')) {
98106
}
99107

100108

101-
for (const { name, buffers, countBys, counts } of require('./config')) {
109+
for (const { name, buffers, countBys, counts } of config) {
102110
const df = DataFrame.from(buffers);
103111

104112
b.suite(
105113
`DataFrame Iterate "${name}"`,
106114

107115
b.add(`length: ${formatNumber(df.length)}`, () => {
108-
for (value of df) {}
116+
for (const _value of df) {}
109117
}),
110118

111119
b.cycle(cycle)
@@ -114,8 +122,8 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
114122
b.suite(
115123
`DataFrame Count By "${name}"`,
116124

117-
...countBys.map((column) => b.add(
118-
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}`,
125+
...countBys.map((column: string) => b.add(
126+
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}`,
119127
() => df.countBy(column)
120128
)),
121129

@@ -125,10 +133,10 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
125133
b.suite(
126134
`DataFrame Filter-Scan Count "${name}"`,
127135

128-
...counts.map(({ column, test, value }) => b.add(
129-
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
136+
...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
137+
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
130138
() => {
131-
let filteredDf;
139+
let filteredDf: Arrow.FilteredDataFrame;
132140
if (test == 'gt') {
133141
filteredDf = df.filter(col(column).gt(value));
134142
} else if (test == 'eq') {
@@ -147,10 +155,10 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
147155
b.suite(
148156
`DataFrame Filter-Iterate "${name}"`,
149157

150-
...counts.map(({ column, test, value }) => b.add(
151-
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
158+
...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
159+
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
152160
() => {
153-
let filteredDf;
161+
let filteredDf: Arrow.FilteredDataFrame;
154162
if (test == 'gt') {
155163
filteredDf = df.filter(col(column).gt(value));
156164
} else if (test == 'eq') {
@@ -160,8 +168,8 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
160168
}
161169

162170
return () => {
163-
for (value of filteredDf) {}
164-
}
171+
for (const _value of filteredDf) {}
172+
};
165173
}
166174
)),
167175

@@ -171,43 +179,43 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
171179
b.suite(
172180
`DataFrame Direct Count "${name}"`,
173181

174-
...counts.map(({ column, test, value }) => b.add(
175-
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column).type}, test: ${test}, value: ${value}`,
182+
...counts.map(({ column, test, value }: {column: string; test: 'gt' | 'eq'; value: number | string}) => b.add(
183+
`name: '${column}', length: ${formatNumber(df.length)}, type: ${df.schema.fields.find((c)=> c.name === column)!.type}, test: ${test}, value: ${value}`,
176184
() => {
177-
let colidx = df.schema.fields.findIndex((c)=> c.name === column);
185+
const colidx = df.schema.fields.findIndex((c)=> c.name === column);
178186

179187
if (test == 'gt') {
180188
return () => {
181-
sum = 0;
182-
let batches = df.chunks;
183-
let numBatches = batches.length;
189+
let sum = 0;
190+
const batches = df.chunks;
191+
const numBatches = batches.length;
184192
for (let batchIndex = -1; ++batchIndex < numBatches;) {
185193
// load batches
186194
const batch = batches[batchIndex];
187-
const vector = batch.getChildAt(colidx);
195+
const vector = batch.getChildAt(colidx)!;
188196
// yield all indices
189197
for (let index = -1, length = batch.length; ++index < length;) {
190-
sum += (vector.get(index) >= value);
198+
sum += (vector.get(index) >= value) ? 1 : 0;
191199
}
192200
}
193201
return sum;
194-
}
202+
};
195203
} else if (test == 'eq') {
196204
return () => {
197-
sum = 0;
198-
let batches = df.chunks;
199-
let numBatches = batches.length;
205+
let sum = 0;
206+
const batches = df.chunks;
207+
const numBatches = batches.length;
200208
for (let batchIndex = -1; ++batchIndex < numBatches;) {
201209
// load batches
202210
const batch = batches[batchIndex];
203-
const vector = batch.getChildAt(colidx);
211+
const vector = batch.getChildAt(colidx)!;
204212
// yield all indices
205213
for (let index = -1, length = batch.length; ++index < length;) {
206-
sum += (vector.get(index) === value);
214+
sum += (vector.get(index) === value) ? 1 : 0;
207215
}
208216
}
209217
return sum;
210-
}
218+
};
211219
} else {
212220
throw new Error(`Unrecognized test "${test}"`);
213221
}
@@ -218,7 +226,7 @@ for (const { name, buffers, countBys, counts } of require('./config')) {
218226

219227
b.complete(() => {
220228
// last benchmark finished
221-
json && process.stderr.write(JSON.stringify(results, null, 2))
229+
json && process.stderr.write(JSON.stringify(results, null, 2));
222230
})
223231
);
224232
}

js/test/generate-test-data.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -580,9 +580,7 @@ type TypedArrayConstructor =
580580

581581
const rand = Math.random.bind(Math);
582582
const randomBytes = (length: number) => fillRandom(Uint8Array, length);
583-
const randomString = ((opts) =>
584-
(length: number) => randomatic('?', length, opts)
585-
)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
583+
const randomString = (length: number) => randomatic('?', length, { chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });
586584

587585
const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();
588586

js/tsconfig.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
"compilerOptions": {
88
"target": "ESNEXT",
99
"module": "commonjs",
10-
"noEmit": true
10+
"noEmit": true,
11+
"esModuleInterop": true
1112
},
12-
"include": ["src/**/*.ts", "test/**/*.ts"]
13+
"include": ["src/**/*.ts", "test/**/*.ts", "perf/**/*.ts"]
1314
}

0 commit comments

Comments
 (0)