Skip to content

Commit 832cc30

Browse files
committed
add more js integration scripts for creating/converting arrow formats
1 parent 263d06d commit 832cc30

8 files changed

Lines changed: 187 additions & 2 deletions

File tree

js/bin/file-to-stream.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#! /usr/bin/env node
2+
3+
// Licensed to the Apache Software Foundation (ASF) under one
4+
// or more contributor license agreements. See the NOTICE file
5+
// distributed with this work for additional information
6+
// regarding copyright ownership. The ASF licenses this file
7+
// to you under the Apache License, Version 2.0 (the
8+
// "License"); you may not use this file except in compliance
9+
// with the License. You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing,
14+
// software distributed under the License is distributed on an
15+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
// KIND, either express or implied. See the License for the
17+
// specific language governing permissions and limitations
18+
// under the License.
19+
20+
const fs = require('fs');
21+
const path = require('path');
22+
const streamToIterator = require('stream-to-iterator');
23+
24+
const encoding = 'binary';
25+
const { util: { PipeIterator } } = require('../');
26+
const { Table, serializeStream } = require('../');
27+
28+
(async () => {
29+
// Todo (ptaylor): implement `serializeStreamAsync` that accepts an
30+
// AsyncIterable<Buffer>, rather than aggregating into a Table first
31+
const in_ = streamToIterator(process.argv.length < 3 ? process.stdin :
32+
fs.createReadStream(path.resolve(process.argv[2]), { encoding }));
33+
const out = process.argv.length < 4 ? process.stdout :
34+
fs.createWriteStream(path.resolve(process.argv[3]), { encoding });
35+
new PipeIterator(serializeStream(await Table.fromAsync(readArrowFile(in_))), encoding).pipe(out);
36+
37+
})().catch((e) => { console.error(e); process.exit(1); });
38+
39+
async function readArrowFile(stream) {
40+
let buffer = Buffer.from([]);
41+
for await (let chunk of stream) {
42+
chunk = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk, encoding);
43+
buffer = Buffer.concat([buffer, chunk], buffer.length + chunk.length);
44+
}
45+
return buffer;
46+
}

js/bin/json-to-arrow.js

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#! /usr/bin/env node
2+
3+
// Licensed to the Apache Software Foundation (ASF) under one
4+
// or more contributor license agreements. See the NOTICE file
5+
// distributed with this work for additional information
6+
// regarding copyright ownership. The ASF licenses this file
7+
// to you under the Apache License, Version 2.0 (the
8+
// "License"); you may not use this file except in compliance
9+
// with the License. You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing,
14+
// software distributed under the License is distributed on an
15+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
// KIND, either express or implied. See the License for the
17+
// specific language governing permissions and limitations
18+
// under the License.
19+
20+
const fs = require('fs');
21+
const glob = require('glob');
22+
const path = require('path');
23+
const { Table } = require('../index');
24+
const { promisify } = require('util');
25+
const { parse } = require('json-bignum');
26+
const argv = require(`command-line-args`)(cliOpts(), { partial: true });
27+
28+
const encoding = 'binary';
29+
const stream = argv.format === 'stream';
30+
const jsonPaths = [...(argv.json || [])];
31+
const arrowPaths = [...(argv.arrow || [])];
32+
33+
if (!jsonPaths.length || !arrowPaths.length || (jsonPaths.length !== arrowPaths.length)) {
34+
return print_usage();
35+
}
36+
37+
const readFile = callResolved(promisify(fs.readFile));
38+
const writeFile = callResolved(promisify(fs.writeFile));
39+
40+
(async () => await Promise.all(jsonPaths.map(async (jPath, i) => {
41+
const aPath = arrowPaths[i];
42+
const arrowTable = Table.from(parse('' + (await readFile(jPath))));
43+
await writeFile(aPath, arrowTable.serialize(encoding, stream), encoding);
44+
})))().catch((e) => { console.error(e); process.exit(1); });
45+
46+
function callResolved(fn) {
47+
return async (path_, ...xs) => await fn(path.resolve(path_), ...xs);
48+
}
49+
50+
function cliOpts() {
51+
return [
52+
{
53+
type: String,
54+
name: 'format', alias: 'f',
55+
multiple: false, defaultValue: 'file',
56+
description: 'The Arrow format to write, either "file" or "stream"'
57+
},
58+
{
59+
type: String,
60+
name: 'arrow', alias: 'a',
61+
multiple: true, defaultValue: [],
62+
description: 'The Arrow file[s] to write'
63+
},
64+
{
65+
type: String,
66+
name: 'json', alias: 'j',
67+
multiple: true, defaultValue: [],
68+
description: 'The JSON file[s] to read'
69+
}
70+
];
71+
}
72+
73+
function print_usage() {
74+
console.log(require('command-line-usage')([
75+
{
76+
header: 'json-to-arrow',
77+
content: 'Script for converting an JSON Arrow file to a binary Arrow file'
78+
},
79+
{
80+
header: 'Synopsis',
81+
content: [
82+
'$ json-to-arrow.js -j in.json -a out.arrow -f stream'
83+
]
84+
},
85+
{
86+
header: 'Options',
87+
optionList: [
88+
...cliOpts(),
89+
{
90+
name: 'help',
91+
description: 'Print this usage guide.'
92+
}
93+
]
94+
},
95+
]));
96+
process.exit(1);
97+
}

js/bin/stream-to-file.js

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#! /usr/bin/env node
2+
3+
// Licensed to the Apache Software Foundation (ASF) under one
4+
// or more contributor license agreements. See the NOTICE file
5+
// distributed with this work for additional information
6+
// regarding copyright ownership. The ASF licenses this file
7+
// to you under the Apache License, Version 2.0 (the
8+
// "License"); you may not use this file except in compliance
9+
// with the License. You may obtain a copy of the License at
10+
//
11+
// http://www.apache.org/licenses/LICENSE-2.0
12+
//
13+
// Unless required by applicable law or agreed to in writing,
14+
// software distributed under the License is distributed on an
15+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
// KIND, either express or implied. See the License for the
17+
// specific language governing permissions and limitations
18+
// under the License.
19+
20+
const fs = require('fs');
21+
const path = require('path');
22+
const streamToIterator = require('stream-to-iterator');
23+
24+
const encoding = 'binary';
25+
const { util: { PipeIterator } } = require('../');
26+
const { Table, serializeFile, fromNodeStream } = require('../');
27+
28+
(async () => {
29+
// Todo (ptaylor): implement `serializeFileAsync` that accepts an
30+
// AsyncIterable<Buffer>, rather than aggregating into a Table first
31+
const in_ = streamToIterator(process.argv.length < 3 ? process.stdin :
32+
fs.createReadStream(path.resolve(process.argv[2]), { encoding }));
33+
const out = process.argv.length < 4 ? process.stdout :
34+
fs.createWriteStream(path.resolve(process.argv[3]), { encoding });
35+
new PipeIterator(serializeFile(await Table.fromAsync(fromNodeStream(in_))), encoding).pipe(out);
36+
37+
})().catch((e) => { console.error(e); process.exit(1); });

js/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module.exports = require('./targets/apache-arrow');

js/index.mjs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export * from './targets/apache-arrow';

js/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export * from './src/Arrow';

js/package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
"version": "0.3.0",
33
"name": "apache-arrow",
44
"description": "Apache Arrow columnar in-memory format",
5+
"main": "./index",
56
"bin": {
67
"arrow2csv": "bin/arrow2csv.js"
78
},
@@ -53,7 +54,7 @@
5354
],
5455
"dependencies": {
5556
"@types/flatbuffers": "1.6.5",
56-
"@types/node": "9.3.0",
57+
"@types/node": "10.0.8",
5758
"@types/text-encoding-utf-8": "1.0.1",
5859
"command-line-args": "5.0.1",
5960
"command-line-usage": "4.1.0",
@@ -96,6 +97,7 @@
9697
"stream-to-iterator": "3.0.1",
9798
"trash": "4.2.1",
9899
"ts-jest": "22.0.1",
100+
"ts-node": "6.0.3",
99101
"tslint": "5.9.1",
100102
"typedoc": "0.10.0",
101103
"typescript": "2.7.1",

js/tsconfig.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,6 @@
66
},
77
"compilerOptions": {
88
"target": "ESNEXT",
9-
"module": "es2015"
9+
"module": "commonjs"
1010
}
1111
}

0 commit comments

Comments
 (0)