File tree Expand file tree Collapse file tree 7 files changed +289
-0
lines changed
lib/node_modules/@stdlib/regexp/utf16-unpaired-surrogate Expand file tree Collapse file tree 7 files changed +289
-0
lines changed Original file line number Diff line number Diff line change 1+ # UTF-16 Unpaired Surrogate
2+
3+ > [ Regular expression] [ mdn-regexp ] to match an unpaired [ UTF-16] [ utf-16 ] surrogate.
4+
5+
6+ <section class =" usage " >
7+
8+ ## Usage
9+
10+ ``` javascript
11+ var RE_UTF16_UNPAIRED_SURROGATE = require ( ' @stdlib/regexp/utf16-unpaired-surrogate' );
12+ ```
13+
14+ #### RE_UTF16_UNPAIRED_SURROGATE
15+
16+ [ Regular expression] [ mdn-regexp ] to match an unpaired [ UTF-16] [ utf-16 ] surrogate.
17+
18+ ``` javascript
19+ var bool = RE_UTF16_UNPAIRED_SURROGATE .test ( ' abc\uD800 def' );
20+ // returns true
21+ ```
22+
23+ </section >
24+
25+ <!-- /.usage -->
26+
27+
28+ <section class =" examples " >
29+
30+ ## Examples
31+
32+ ``` javascript
33+ var RE_UTF16_UNPAIRED_SURROGATE = require ( ' @stdlib/regexp/utf16-unpaired-surrogate' );
34+
35+ var bool = RE_UTF16_UNPAIRED_SURROGATE .test ( ' \uD800 ' );
36+ // returns true
37+
38+ bool = RE_UTF16_UNPAIRED_SURROGATE .test ( ' \uDC00 ' );
39+ // returns true
40+
41+ bool = RE_UTF16_UNPAIRED_SURROGATE .test ( ' abc' );
42+ // returns false
43+ ```
44+
45+ </section >
46+
47+ <!-- /.examples -->
48+
49+
50+ <section class =" links " >
51+
52+ [ mdn-regexp ] : https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
53+ [ utf-16 ] : https://en.wikipedia.org/wiki/UTF-16
54+
55+ </section >
56+
57+ <!-- /.links -->
Original file line number Diff line number Diff line change 1+ 'use strict' ;
2+
3+ // MODULES //
4+
5+ var bench = require ( '@stdlib/bench' ) ;
6+ var isBoolean = require ( '@stdlib/assert/is-boolean' ) . isPrimitive ;
7+ var fromCodePoint = require ( '@stdlib/string/from-code-point' ) ;
8+ var pkg = require ( './../package.json' ) . name ;
9+ var RE_UTF16_UNPAIRED_SURROGATE = require ( './../lib' ) ; // eslint-disable-line id-length
10+
11+
12+ // MAIN //
13+
14+ bench ( pkg , function benchmark ( b ) {
15+ var bool ;
16+ var str ;
17+ var i ;
18+
19+ b . tic ( ) ;
20+ for ( i = 0 ; i < b . iterations ; i ++ ) {
21+ str = 'beep boop\r\n' + fromCodePoint ( 97 + ( i % 26 ) ) + '\r\nfoo bar' ;
22+ bool = RE_UTF16_UNPAIRED_SURROGATE . test ( str ) ;
23+ if ( ! isBoolean ( bool ) ) {
24+ b . fail ( 'should return a boolean' ) ;
25+ }
26+ }
27+ b . toc ( ) ;
28+ if ( ! isBoolean ( bool ) ) {
29+ b . fail ( 'should return a boolean' ) ;
30+ }
31+ b . pass ( 'benchmark finished' ) ;
32+ b . end ( ) ;
33+ } ) ;
Original file line number Diff line number Diff line change 1+
2+ {{alias}}
3+ Regular expression to match an unpaired UTF-16 surrogate.
4+
5+ Examples
6+ --------
7+ > var bool = {{alias}}.test( 'abc' )
8+ false
9+ > bool = {{alias}}.test( '\uD800' )
10+ true
11+
12+ See Also
13+ --------
14+
Original file line number Diff line number Diff line change 1+ 'use strict' ;
2+
3+ var RE_UTF16_UNPAIRED_SURROGATE = require ( './../lib' ) ; // eslint-line-disable id-length
4+
5+ console . log ( RE_UTF16_UNPAIRED_SURROGATE . test ( '\uD800' ) ) ;
6+ // => true
7+
8+ console . log ( RE_UTF16_UNPAIRED_SURROGATE . test ( '\uDC00' ) ) ;
9+ // => true
10+
11+ console . log ( RE_UTF16_UNPAIRED_SURROGATE . test ( 'abc' ) ) ;
12+ // => false
Original file line number Diff line number Diff line change 1+ 'use strict' ;
2+
3+ /**
4+ * Regular expression to match a UTF-16 unpaired surrogate.
5+ *
6+ * @module @stdlib /regexp/utf16-unpaired-surrogate
7+ * @type {RegExp }
8+ *
9+ * @example
10+ * var RE_UTF16_UNPAIRED_SURROGATE = require( '@stdlib/regexp/utf16-unpaired-surrogate' );
11+ *
12+ * var bool = RE_UTF16_UNPAIRED_SURROGATE.test( '\uD800' );
13+ * // returns true
14+ *
15+ * bool = RE_UTF16_UNPAIRED_SURROGATE.test( '\uDC00' );
16+ * // returns true
17+ *
18+ * bool = RE_UTF16_UNPAIRED_SURROGATE.test( 'abc' );
19+ * // returns false
20+ */
21+
22+
23+ // MAIN //
24+
25+ /**
26+ * Matches an unpaired UTF-16 surrogate.
27+ *
28+ * Regular expression: `/(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/`
29+ *
30+ * * `(?:[^\uD800-\uDBFF]|^)`
31+ * - capture but do not remember anything which is not a high surrogate, including nothing
32+ * * `[\uDC00-\uDFFF]`
33+ * - match a low surrogate
34+ * * `|`
35+ * - OR
36+ * * `[\uD800-\uDBFF]`
37+ * - match a high surrogate
38+ * * `(?![\uDC00-\uDFFF])`
39+ * - but only accept the previous match if not followed by a low surrogate
40+ *
41+ *
42+ * @constant
43+ * @type {RegExp }
44+ * @default /(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])/
45+ */
46+ var RE_UTF16_UNPAIRED_SURROGATE = / (?: [ ^ \uD800 - \uDBFF ] | ^ ) [ \uDC00 - \uDFFF ] | [ \uD800 - \uDBFF ] (? ! [ \uDC00 - \uDFFF ] ) / ; // eslint-disable-line id-length
47+
48+
49+ // EXPORTS //
50+
51+ module . exports = RE_UTF16_UNPAIRED_SURROGATE ;
Original file line number Diff line number Diff line change 1+ {
2+ "name" : " @stdlib/regexp/utf16-unpaired-surrogate" ,
3+ "version" : " 0.0.0" ,
4+ "description" : " Regular expression to match an unpaired UTF-16 surrogate." ,
5+ "author" : {
6+ "name" : " The Stdlib Authors" ,
7+ "url" : " https://github.com/stdlib-js/stdlib/graphs/contributors"
8+ },
9+ "contributors" : [
10+ {
11+ "name" : " The Stdlib Authors" ,
12+ "url" : " https://github.com/stdlib-js/stdlib/graphs/contributors"
13+ }
14+ ],
15+ "scripts" : {},
16+ "main" : " ./lib" ,
17+ "repository" : {
18+ "type" : " git" ,
19+ "url" : " git://github.com/stdlib-js/stdlib.git"
20+ },
21+ "homepage" : " https://github.com/stdlib-js/stdlib" ,
22+ "keywords" : [
23+ " stdlib" ,
24+ " regex" ,
25+ " regexp" ,
26+ " re" ,
27+ " utf-16" ,
28+ " utf16" ,
29+ " unicode" ,
30+ " surrogate" ,
31+ " high" ,
32+ " low" ,
33+ " pair" ,
34+ " regular" ,
35+ " expression" ,
36+ " capture" ,
37+ " match"
38+ ],
39+ "bugs" : {
40+ "url" : " https://github.com/stdlib-js/stdlib/issues"
41+ },
42+ "dependencies" : {},
43+ "devDependencies" : {},
44+ "engines" : {
45+ "node" : " >=0.10.0" ,
46+ "npm" : " >2.7.0"
47+ },
48+ "license" : " Apache-2.0"
49+ }
Original file line number Diff line number Diff line change 1+ 'use strict' ;
2+
3+ // MODULES //
4+
5+ var tape = require ( 'tape' ) ;
6+ var RE = require ( './../lib' ) ;
7+
8+
9+ // TESTS //
10+
11+ tape ( 'main export is a regular expression' , function test ( t ) {
12+ t . ok ( true , __filename ) ;
13+ t . strictEqual ( RE instanceof RegExp , true , 'main export is a regular expression' ) ;
14+ t . end ( ) ;
15+ } ) ;
16+
17+ tape ( 'the regular expression matches an unpaired UTF-16 surrogate' , function test ( t ) {
18+ var values ;
19+ var i ;
20+
21+ values = [
22+ '\uD800' ,
23+ '\uD801' ,
24+ '\uD802' ,
25+ '\uDBFF' ,
26+ '\uDC00' ,
27+ '\uDC01' ,
28+ '\uDFFE' ,
29+ '\uDFFF' ,
30+ 'abc\uD800abc' ,
31+ 'abc\uDFFFabc'
32+ ] ;
33+
34+ for ( i = 0 ; i < values . length ; i ++ ) {
35+ t . strictEqual ( RE . test ( values [ i ] ) , true , 'matches when provided ' + values [ i ] ) ;
36+ }
37+ t . end ( ) ;
38+ } ) ;
39+
40+ tape ( 'the regular expression does not match surrogate pairs' , function test ( t ) {
41+ var values ;
42+ var i ;
43+
44+ values = [
45+ '\uD800\uDC00' ,
46+ '\uD801\uDC01' ,
47+ '\uDBFF\uDFFF' ,
48+ 'abc\uD800\uDC00abc'
49+ ] ;
50+
51+ for ( i = 0 ; i < values . length ; i ++ ) {
52+ t . strictEqual ( RE . test ( values [ i ] ) , false , 'does not match when provided ' + values [ i ] ) ;
53+ }
54+ t . end ( ) ;
55+ } ) ;
56+
57+ tape ( 'the regular expression does not match non-surrogates' , function test ( t ) {
58+ var values ;
59+ var i ;
60+
61+ values = [
62+ 'a' ,
63+ 'b' ,
64+ 'c' ,
65+ 'abc' ,
66+ 'defgihjk'
67+ ] ;
68+
69+ for ( i = 0 ; i < values . length ; i ++ ) {
70+ t . strictEqual ( RE . test ( values [ i ] ) , false , 'does not match when provided ' + values [ i ] ) ;
71+ }
72+ t . end ( ) ;
73+ } ) ;
You can’t perform that action at this time.
0 commit comments