Skip to content

Commit a586fd0

Browse files
committed
Add utlity to convert a package name to a bucket
1 parent ff01a8d commit a586fd0

File tree

11 files changed

+1015
-0
lines changed

11 files changed

+1015
-0
lines changed
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
<!--
2+
3+
@license Apache-2.0
4+
5+
Copyright (c) 2022 The Stdlib Authors.
6+
7+
Licensed under the Apache License, Version 2.0 (the "License");
8+
you may not use this file except in compliance with the License.
9+
You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
19+
-->
20+
21+
# name2bucket
22+
23+
> Deterministically convert a package name (or a list of package names) to a bucket.
24+
25+
<section class="usage">
26+
27+
## Usage
28+
29+
```javascript
30+
var name2bucket = require( '@stdlib/_tools/pkgs/name2bucket' );
31+
```
32+
33+
#### name2bucket( name, buckets )
34+
35+
Deterministically converts a package `name` (or a list of package names) to a bucket.
36+
37+
```javascript
38+
var out = name2bucket( '@stdlib/math/base/special/sin', 10 );
39+
// returns <number>
40+
```
41+
42+
To convert more than one package `name`, provide a list of package names.
43+
44+
```javascript
45+
var list = [
46+
'@stdlib/math/base/special/sin',
47+
'@stdlib/math/base/special/cos'
48+
];
49+
var out = name2bucket( list, 10 );
50+
// returns [ <number>, <number> ]
51+
```
52+
53+
</section>
54+
55+
<!-- /.usage -->
56+
57+
<section class="notes">
58+
59+
## Notes
60+
61+
- The function maps standalone and non-standalone `stdlib` packages to the same bucket.
62+
63+
```javascript
64+
var list = [
65+
'@stdlib/math/base/special/sin',
66+
'@stdlib/math-base-special-sin'
67+
];
68+
var out = name2bucket( list, 10 );
69+
// returns [...]
70+
71+
var bool = ( out[ 0 ] === out[ 1 ] );
72+
// returns true
73+
```
74+
75+
</section>
76+
77+
<!-- /.notes -->
78+
79+
<section class="examples">
80+
81+
## Examples
82+
83+
<!-- eslint no-undef: "error" -->
84+
85+
```javascript
86+
var join = require( 'path' ).join;
87+
var pkgNames = require( '@stdlib/_tools/pkgs/names' ).sync;
88+
var rootDir = require( '@stdlib/_tools/utils/root-dir' );
89+
var chi2gof = require( '@stdlib/stats/chi2gof' );
90+
var countBy = require( '@stdlib/utils/count-by' );
91+
var objectValues = require( '@stdlib/utils/values' );
92+
var identity = require( '@stdlib/utils/identity-function' );
93+
var name2bucket = require( '@stdlib/_tools/pkgs/name2bucket' );
94+
95+
// Resolve a namespace directory:
96+
var dir = join( rootDir(), 'lib', 'node_modules', '@stdlib', 'math', 'base', 'special' );
97+
98+
// Resolve a list of package names:
99+
var names = pkgNames({
100+
'dir': dir
101+
});
102+
103+
// Place the names into 10 buckets:
104+
var out = name2bucket( names, 10 );
105+
console.log( out.join( '\n' ) );
106+
107+
// Count the number of names in each bin:
108+
var counts = countBy( out, identity );
109+
console.log( counts );
110+
111+
// Determine whether the names are uniformly distributed:
112+
var o = chi2gof( objectValues( counts ), 'discrete-uniform', 0, 9 );
113+
console.log( o.toString() );
114+
```
115+
116+
</section>
117+
118+
<!-- /.examples -->
119+
120+
* * *
121+
122+
<section class="cli">
123+
124+
## CLI
125+
126+
<section class="usage">
127+
128+
### Usage
129+
130+
```text
131+
Usage: stdlib-name2bucket [options] [<name>] --buckets=<num>
132+
133+
Options:
134+
135+
-h, --help Print this message.
136+
-V, --version Print the package version.
137+
--split sep Separator used to split stdin data. Default: /\\r?\\n/.
138+
--buckets num Number of buckets.
139+
```
140+
141+
</section>
142+
143+
<!-- /.usage -->
144+
145+
<section class="notes">
146+
147+
### Notes
148+
149+
- If the split separator is a [regular expression][mdn-regexp], ensure that the `split` option is properly **escaped**.
150+
151+
```bash
152+
# Not escaped...
153+
$ <stdout> | stdlib-name2bucket --split /\r?\n/ --buckets=10
154+
155+
# Escaped...
156+
$ <stdout> | stdlib-name2bucket --split /\\r?\\n/ --buckets=10
157+
```
158+
159+
</section>
160+
161+
<!-- /.notes -->
162+
163+
<section class="examples">
164+
165+
### Examples
166+
167+
```bash
168+
$ stdlib-name2bucket --buckets=10 '@stdlib/math/base/special/sin'
169+
```
170+
171+
To use as part of a [standard stream][standard-stream] pipeline,
172+
173+
```bash
174+
$ echo -n $'@stdlib/math/base\t@stdlib/math\t@stdlib/utils/copy' | stdlib-name2bucket --buckets=10 --split /\\t/
175+
```
176+
177+
</section>
178+
179+
<!-- /.examples -->
180+
181+
</section>
182+
183+
<!-- /.cli -->
184+
185+
<!-- Section for related `stdlib` packages. Do not manually edit this section, as it is automatically populated. -->
186+
187+
<section class="related">
188+
189+
</section>
190+
191+
<!-- /.related -->
192+
193+
<!-- Section for all links. Make sure to keep an empty line after the `section` element and another before the `/section` close. -->
194+
195+
<section class="links">
196+
197+
[mdn-regexp]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
198+
199+
[standard-stream]: http://en.wikipedia.org/wiki/Pipeline_%28Unix%29
200+
201+
</section>
202+
203+
<!-- /.links -->
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* @license Apache-2.0
5+
*
6+
* Copyright (c) 2022 The Stdlib Authors.
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
'use strict';
22+
23+
// MODULES //
24+
25+
var resolve = require( 'path' ).resolve;
26+
var readFileSync = require( '@stdlib/fs/read-file' ).sync;
27+
var CLI = require( '@stdlib/cli/ctor' );
28+
var stdin = require( '@stdlib/process/read-stdin' );
29+
var stdinStream = require( '@stdlib/streams/node/stdin' );
30+
var RE_EOL = require( '@stdlib/regexp/eol' ).REGEXP;
31+
var isRegExpString = require( '@stdlib/assert/is-regexp-string' );
32+
var reFromString = require( '@stdlib/utils/regexp-from-string' );
33+
var trim = require( '@stdlib/string/trim' );
34+
var convert = require( './../lib' );
35+
36+
37+
// MAIN //
38+
39+
/**
40+
* Main execution sequence.
41+
*
42+
* @private
43+
* @returns {void}
44+
*/
45+
function main() {
46+
var flags;
47+
var args;
48+
var opts;
49+
var cli;
50+
var N;
51+
52+
// Create a command-line interface:
53+
cli = new CLI({
54+
'pkg': require( './../package.json' ),
55+
'options': require( './../etc/cli_opts.json' ),
56+
'help': readFileSync( resolve( __dirname, '..', 'docs', 'usage.txt' ), {
57+
'encoding': 'utf8'
58+
})
59+
});
60+
61+
// Get any provided command-line options:
62+
flags = cli.flags();
63+
if ( flags.help || flags.version ) {
64+
return;
65+
}
66+
N = parseInt( flags.buckets, 10 );
67+
68+
// Get any provided command-line arguments:
69+
args = cli.args();
70+
71+
// Check if we are receiving data from `stdin`...
72+
opts = {};
73+
if ( !stdinStream.isTTY ) {
74+
if ( flags.split ) {
75+
if ( !isRegExpString( flags.split ) ) {
76+
flags.split = '/'+flags.split+'/';
77+
}
78+
opts.split = reFromString( flags.split );
79+
} else {
80+
opts.split = RE_EOL;
81+
}
82+
return stdin( onRead );
83+
}
84+
console.log( convert( args[ 0 ], N ) ); // eslint-disable-line no-console
85+
86+
/**
87+
* Callback invoked upon reading from `stdin`.
88+
*
89+
* @private
90+
* @param {(Error|null)} error - error object
91+
* @param {Buffer} data - data
92+
* @returns {void}
93+
*/
94+
function onRead( error, data ) {
95+
var lines;
96+
var i;
97+
if ( error ) {
98+
return cli.error( error );
99+
}
100+
lines = trim( data.toString() ).split( opts.split );
101+
for ( i = 0; i < lines.length; i++ ) {
102+
console.log( convert( lines[ i ], N ) ); // eslint-disable-line no-console
103+
}
104+
}
105+
}
106+
107+
main();
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
2+
Usage: stdlib-name2bucket [options] [<name>] --buckets=<num>
3+
4+
Options:
5+
6+
-h, --help Print this message.
7+
-V, --version Print the package version.
8+
--split Separator for stdin data. Default: '/\r?\n/'.
9+
--buckets num Number of buckets.
10+
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"string": [
3+
"split",
4+
"buckets"
5+
],
6+
"boolean": [
7+
"help",
8+
"version"
9+
],
10+
"alias": {
11+
"help": [
12+
"h"
13+
],
14+
"version": [
15+
"V"
16+
]
17+
}
18+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* @license Apache-2.0
3+
*
4+
* Copyright (c) 2022 The Stdlib Authors.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
'use strict';
20+
21+
var join = require( 'path' ).join;
22+
var pkgNames = require( '@stdlib/_tools/pkgs/names' ).sync;
23+
var rootDir = require( '@stdlib/_tools/utils/root-dir' );
24+
var chi2gof = require( '@stdlib/stats/chi2gof' );
25+
var countBy = require( '@stdlib/utils/count-by' );
26+
var objectValues = require( '@stdlib/utils/values' );
27+
var identity = require( '@stdlib/utils/identity-function' );
28+
var name2bucket = require( './../lib' );
29+
30+
// Resolve a namespace directory:
31+
var dir = join( rootDir(), 'lib', 'node_modules', '@stdlib', 'math', 'base', 'special' );
32+
33+
// Resolve a list of package names:
34+
var names = pkgNames({
35+
'dir': dir
36+
});
37+
38+
// Place the names into 10 buckets:
39+
var out = name2bucket( names, 10 );
40+
console.log( out.join( '\n' ) );
41+
42+
// Count the number of names in each bin:
43+
var counts = countBy( out, identity );
44+
console.log( counts );
45+
46+
// Determine whether the names are uniformly distributed:
47+
var o = chi2gof( objectValues( counts ), 'discrete-uniform', 0, 9 );
48+
console.log( o.toString() );

0 commit comments

Comments
 (0)