Skip to content

Commit 8a29a6f

Browse files
committed
Use localeCompare instead of simple unicode point comparison
Bug: https://phabricator.wikimedia.org/T407180 Change-Id: I5bcda3b5f0dd732ac0612671987fa981c03a9bc8
1 parent 3c1de83 commit 8a29a6f

File tree

2 files changed

+74
-92
lines changed

2 files changed

+74
-92
lines changed

src/index.js

Lines changed: 26 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
var languageData = require( '../data/language-data.json' );
1+
const languageData = require( '../data/language-data.json' );
22

33
/**
44
* Utility functions for querying language data.
@@ -43,7 +43,7 @@ function getLanguages() {
4343
* @return {string}
4444
*/
4545
function getScript( language ) {
46-
var target = isRedirect( language );
46+
const target = isRedirect( language );
4747
if ( target ) {
4848
return getScript( target );
4949
}
@@ -61,7 +61,7 @@ function getScript( language ) {
6161
* @return {string[]} 'UNKNOWN'
6262
*/
6363
function getRegions( language ) {
64-
var target = isRedirect( language );
64+
const target = isRedirect( language );
6565
if ( target ) {
6666
return getRegions( target );
6767
}
@@ -75,7 +75,7 @@ function getRegions( language ) {
7575
* @return {string}
7676
*/
7777
function getAutonym( language ) {
78-
var target = isRedirect( language );
78+
const target = isRedirect( language );
7979
if ( target ) {
8080
return getAutonym( target );
8181
}
@@ -88,9 +88,8 @@ function getAutonym( language ) {
8888
* @return {Array}
8989
*/
9090
function getAutonyms() {
91-
var language,
92-
autonymsByCode = {};
93-
for ( language in languageData.languages ) {
91+
const autonymsByCode = {};
92+
for ( const language in languageData.languages ) {
9493
if ( isRedirect( language ) ) {
9594
continue;
9695
}
@@ -106,13 +105,12 @@ function getAutonyms() {
106105
* @return {string[]} languages codes
107106
*/
108107
function getLanguagesInScripts( scripts ) {
109-
var language, i,
110-
languagesInScripts = [];
111-
for ( language in languageData.languages ) {
108+
const languagesInScripts = [];
109+
for ( const language in languageData.languages ) {
112110
if ( isRedirect( language ) ) {
113111
continue;
114112
}
115-
for ( i = 0; i < scripts.length; i++ ) {
113+
for ( let i = 0; i < scripts.length; i++ ) {
116114
if ( scripts[ i ] === getScript( language ) ) {
117115
languagesInScripts.push( language );
118116
break;
@@ -140,8 +138,7 @@ function getLanguagesInScript( script ) {
140138
* @return {string} script group name
141139
*/
142140
function getGroupOfScript( script ) {
143-
var scriptGroup;
144-
for ( scriptGroup in languageData.scriptgroups ) {
141+
for ( const scriptGroup in languageData.scriptgroups ) {
145142
if ( languageData.scriptgroups[ scriptGroup ].includes( script ) ) {
146143
return scriptGroup;
147144
}
@@ -166,13 +163,11 @@ function getScriptGroupOfLanguage( language ) {
166163
* @return {string[]} Array of language codes
167164
*/
168165
function getLanguagesByScriptGroup( languages ) {
169-
var languagesByScriptGroup = {},
170-
language, languageIndex, resolvedRedirect, langScriptGroup;
166+
const languagesByScriptGroup = {};
171167

172-
for ( languageIndex = 0; languageIndex < languages.length; languageIndex++ ) {
173-
language = languages[ languageIndex ];
174-
resolvedRedirect = isRedirect( language ) || language;
175-
langScriptGroup = getScriptGroupOfLanguage( resolvedRedirect );
168+
for ( const language of languages ) {
169+
const resolvedRedirect = isRedirect( language ) || language;
170+
const langScriptGroup = getScriptGroupOfLanguage( resolvedRedirect );
176171
if ( !languagesByScriptGroup[ langScriptGroup ] ) {
177172
languagesByScriptGroup[ langScriptGroup ] = [];
178173
}
@@ -189,15 +184,14 @@ function getLanguagesByScriptGroup( languages ) {
189184
* @return {Object}
190185
*/
191186
function getLanguagesByScriptGroupInRegions( regions ) {
192-
var language, i, scriptGroup,
193-
languagesByScriptGroupInRegions = {};
194-
for ( language in languageData.languages ) {
187+
const languagesByScriptGroupInRegions = {};
188+
for ( const language in languageData.languages ) {
195189
if ( isRedirect( language ) ) {
196190
continue;
197191
}
198-
for ( i = 0; i < regions.length; i++ ) {
199-
if ( getRegions( language ).includes( regions[ i ] ) ) {
200-
scriptGroup = getScriptGroupOfLanguage( language );
192+
for ( const region of regions ) {
193+
if ( getRegions( language ).includes( region ) ) {
194+
const scriptGroup = getScriptGroupOfLanguage( language );
201195
if ( languagesByScriptGroupInRegions[ scriptGroup ] === undefined ) {
202196
languagesByScriptGroupInRegions[ scriptGroup ] = [];
203197
}
@@ -227,14 +221,12 @@ function getLanguagesByScriptGroupInRegion( region ) {
227221
* @return {string[]} Array of language codes
228222
*/
229223
function sortByScriptGroup( languages ) {
230-
var groupedLanguages, scriptGroups, i,
231-
allLanguages = [];
224+
const groupedLanguages = getLanguagesByScriptGroup( languages );
225+
const scriptGroups = Object.keys( groupedLanguages ).sort();
226+
let allLanguages = [];
232227

233-
groupedLanguages = getLanguagesByScriptGroup( languages );
234-
scriptGroups = Object.keys( groupedLanguages ).sort();
235-
236-
for ( i = 0; i < scriptGroups.length; i++ ) {
237-
allLanguages = allLanguages.concat( groupedLanguages[ scriptGroups[ i ] ] );
228+
for ( const scriptGroup of scriptGroups ) {
229+
allLanguages = allLanguages.concat( groupedLanguages[ scriptGroup ] );
238230
}
239231

240232
return allLanguages;
@@ -249,9 +241,9 @@ function sortByScriptGroup( languages ) {
249241
* @return {number}
250242
*/
251243
function sortByAutonym( a, b ) {
252-
var autonymA = getAutonym( a ) || a,
244+
const autonymA = getAutonym( a ) || a,
253245
autonymB = getAutonym( b ) || b;
254-
return ( autonymA.toLowerCase() < autonymB.toLowerCase() ) ? -1 : 1;
246+
return autonymA.localeCompare( autonymB );
255247
}
256248

257249
/**

tests/js/index.js

Lines changed: 48 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,15 @@
1-
var languageData = require( __dirname + '/../../src/index' ),
1+
const languageData = require( '../../src/index.js' ),
22
assert = require( 'assert' );
33

4-
describe( 'languagedata', function () {
5-
var orphanScripts, badRedirects, invalidCodes,
6-
doubleRedirects, doubleAutonyms, languagesWithoutAutonym;
4+
describe( 'languagedata', () => {
75
/*
86
* Runs over all script codes mentioned in langdb and checks whether
97
* they belong to the 'Other' group.
108
*/
11-
orphanScripts = function () {
12-
var language, script,
13-
result = [];
14-
for ( language in languageData.getLanguages() ) {
15-
script = languageData.getScript( language );
9+
const orphanScripts = () => {
10+
const result = [];
11+
for ( const language in languageData.getLanguages() ) {
12+
const script = languageData.getScript( language );
1613
if ( languageData.getGroupOfScript( script ) === 'Other' ) {
1714
result.push( script );
1815
}
@@ -22,11 +19,10 @@ describe( 'languagedata', function () {
2219
/*
2320
* Runs over all languages and checks that all redirects have a valid target.
2421
*/
25-
badRedirects = function () {
26-
var language, target,
27-
result = [];
28-
for ( language in languageData.getLanguages() ) {
29-
target = languageData.isRedirect( language );
22+
const badRedirects = () => {
23+
const result = [];
24+
for ( const language in languageData.getLanguages() ) {
25+
const target = languageData.isRedirect( language );
3026
if ( target && !languageData.getLanguages()[ target ] ) {
3127
result.push( language );
3228
}
@@ -36,12 +32,11 @@ describe( 'languagedata', function () {
3632
/*
3733
* Runs over all languages and checks that all redirects have a valid target.
3834
*/
39-
invalidCodes = function () {
40-
var languageCode,
41-
invalidCharsRe = /[^0-9a-z-]/,
42-
result = [];
35+
const invalidCodes = () => {
36+
const invalidCharsRe = /[^0-9a-z-]/;
37+
const result = [];
4338

44-
for ( languageCode in languageData.getLanguages() ) {
39+
for ( const languageCode in languageData.getLanguages() ) {
4540
if ( languageCode.match( invalidCharsRe ) ) {
4641
result.push( languageCode );
4742
}
@@ -53,11 +48,10 @@ describe( 'languagedata', function () {
5348
* Runs over all languages and checks that all redirects point to a language.
5449
* There's no reason to have double redirects.
5550
*/
56-
doubleRedirects = function () {
57-
var language, target,
58-
result = [];
59-
for ( language in languageData.getLanguages() ) {
60-
target = languageData.isRedirect( language );
51+
const doubleRedirects = () => {
52+
const result = [];
53+
for ( const language in languageData.getLanguages() ) {
54+
const target = languageData.isRedirect( language );
6155
if ( target && languageData.isRedirect( target ) ) {
6256
result.push( language );
6357
}
@@ -67,19 +61,18 @@ describe( 'languagedata', function () {
6761
/*
6862
* Runs over all languages and checks that all autonyms are unique.
6963
*/
70-
doubleAutonyms = function () {
71-
var language, autonym,
72-
autonyms = [],
73-
duplicateAutonyms = [];
64+
const doubleAutonyms = () => {
65+
const autonyms = [];
66+
const duplicateAutonyms = [];
7467

75-
for ( language in languageData.getLanguages() ) {
68+
for ( const language in languageData.getLanguages() ) {
7669
if ( languageData.isRedirect( language ) ) {
7770
continue;
7871
}
7972

80-
autonym = languageData.getAutonym( language );
73+
const autonym = languageData.getAutonym( language );
8174

82-
if ( autonyms.indexOf( autonym ) > -1 ) {
75+
if ( autonyms.includes( autonym ) ) {
8376
duplicateAutonyms.push( language );
8477
}
8578

@@ -92,26 +85,24 @@ describe( 'languagedata', function () {
9285
* Runs over all script codes mentioned in langdb and checks whether
9386
* they have something that looks like an autonym.
9487
*/
95-
languagesWithoutAutonym = function () {
96-
var language,
97-
result = [];
98-
for ( language in languageData.getLanguages() ) {
88+
const languagesWithoutAutonym = () => {
89+
const result = [];
90+
for ( const language in languageData.getLanguages() ) {
9991
if ( typeof languageData.getAutonym( language ) !== 'string' ) {
10092
result.push( language );
10193
}
10294
}
10395
return result;
10496
};
10597

106-
it( 'language tags', function () {
98+
it( 'language tags', () => {
10799
assert.ok( languageData.isKnown( 'ar' ), 'Language is unknown' );
108100
assert.ok( !languageData.isKnown( 'unknownLanguageCode!' ), 'Language is known' );
109101
assert.deepEqual( invalidCodes(), [], 'All language codes have no invalid characters.' );
110102
} );
111103

112-
it( 'autonyms', function () {
113-
var autonyms, chineseScriptLanguages, i,
114-
languagesWithParentheses = [];
104+
it( 'autonyms', () => {
105+
const languagesWithParentheses = [];
115106
// Add a language in run time.
116107
// This is done early to make sure that it doesn't break other functions.
117108
languageData.addLanguage( 'qqq', {
@@ -120,11 +111,11 @@ describe( 'languagedata', function () {
120111
autonym: 'Language documentation'
121112
} );
122113
assert.ok( languageData.getAutonym( 'qqq' ), 'Language documentation', 'Language qqq was added with the correct autonym' );
123-
autonyms = languageData.getAutonyms();
124-
assert.strictEqual( autonyms[ 'zu' ], 'isiZulu', 'Correct autonym is returned for Zulu using getAutonyms().' );
114+
const autonyms = languageData.getAutonyms();
115+
assert.strictEqual( autonyms.zu, 'isiZulu', 'Correct autonym is returned for Zulu using getAutonyms().' );
125116
assert.deepEqual( doubleAutonyms(), [], 'All languages have distinct autonyms.' );
126117
assert.strictEqual( autonyms[ 'pa-guru' ], undefined, 'Language "pa-guru" is not listed in autonyms, because it is a redirect' );
127-
assert.strictEqual( autonyms[ 'pa' ], 'ਪੰਜਾਬੀ', 'Language "pa" has the correct autonym' );
118+
assert.strictEqual( autonyms.pa, 'ਪੰਜਾਬੀ', 'Language "pa" has the correct autonym' );
128119
assert.deepEqual( languagesWithoutAutonym(), [], 'All languages have autonyms.' );
129120
assert.strictEqual( languageData.getAutonym( 'pa' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa.' );
130121
assert.strictEqual( languageData.getAutonym( 'pa-guru' ), 'ਪੰਜਾਬੀ', 'Correct autonym of the Punjabi language was selected using code pa-guru.' );
@@ -133,17 +124,16 @@ describe( 'languagedata', function () {
133124
'gn', 'de', 'hu', 'fi'
134125
], 'Languages are correctly sorted by autonym' );
135126

136-
chineseScriptLanguages = languageData.getLanguagesInScripts( [ 'Hans', 'Hant', 'Hani' ] );
137-
for ( i = 0; i < chineseScriptLanguages.length; ++i ) {
138-
if ( languageData.getAutonym( chineseScriptLanguages[i] ).match( /[()]/ ) ) {
139-
languagesWithParentheses.push( chineseScriptLanguages[i] );
127+
const chineseScriptLanguages = languageData.getLanguagesInScripts( [ 'Hans', 'Hant', 'Hani' ] );
128+
for ( const lang of chineseScriptLanguages ) {
129+
if ( languageData.getAutonym( lang ).match( /[()]/ ) ) {
130+
languagesWithParentheses.push( lang );
140131
}
141132
}
142133
assert.deepEqual( languagesWithParentheses, [], 'Chinese script languages\' autonyms don\'t have Western parentheses' );
143134
} );
144-
it( 'regions and groups', function () {
145-
var languagesAM,
146-
regionGroups = languageData.getRegionGroups();
135+
it( 'regions and groups', () => {
136+
const regionGroups = languageData.getRegionGroups();
147137

148138
// This test assumes that we don't want any scripts to be in the 'Other'
149139
// group. Actually, this may become wrong some day.
@@ -160,22 +150,22 @@ describe( 'languagedata', function () {
160150
'An invalid country has no languages and returns an empty array'
161151
);
162152

163-
languagesAM = [ 'atj', 'chr', 'chy', 'cr', 'en', 'es', 'fr', 'gn', 'haw', 'ike-cans', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'yi' ];
153+
const languagesAM = [ 'atj', 'chr', 'chy', 'cr', 'en', 'es', 'fr', 'gn', 'haw', 'ike-cans', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'yi' ];
164154
assert.deepEqual(
165155
languageData.sortByScriptGroup( languagesAM.sort( languageData.sortByAutonym ) ),
166-
[ 'atj', 'gn', 'en', 'es', 'fr', 'haw', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'chy', 'yi', 'ike-cans', 'cr', 'chr' ],
156+
[ 'atj', 'gn', 'en', 'es', 'fr', 'haw', 'ik', 'kl', 'nl', 'pt', 'qu', 'srn', 'chy', 'yi', 'chr', 'ike-cans', 'cr' ],
167157
'languages in region AM are ordered correctly by script group'
168158
);
169159

170-
for ( var language in languageData.getLanguages() ) {
171-
var regions = languageData.getRegions( language );
160+
for ( const language in languageData.getLanguages() ) {
161+
const regions = languageData.getRegions( language );
172162

173-
for ( var region of regions ) {
174-
assert.ok( regionGroups[region], `Language ${language} has an invalid region ${region}` );
163+
for ( const region of regions ) {
164+
assert.ok( regionGroups[ region ], `Language ${language} has an invalid region ${region}` );
175165
}
176166
}
177167
} );
178-
it( 'scripts', function () {
168+
it( 'scripts', () => {
179169
// This test assumes that we don't want any scripts to be in the 'Other'
180170
// group. Actually, this may become wrong some day.
181171
assert.deepEqual( orphanScripts(), [], 'All scripts belong to script groups.' );
@@ -187,14 +177,14 @@ describe( 'languagedata', function () {
187177
assert.strictEqual( languageData.getGroupOfScript( 'Beng' ), 'SouthAsian', 'Bengali script belongs to the SouthAsian group.' );
188178
assert.strictEqual( languageData.getScriptGroupOfLanguage( 'iu' ), 'NativeAmerican', 'The script of the Inupiaq language belongs to the NativeAmerican group.' );
189179
} );
190-
it( 'redirects', function () {
180+
it( 'redirects', () => {
191181
assert.strictEqual( languageData.isRedirect( 'sr-ec' ), 'sr-cyrl', '"sr-ec" is a redirect to "sr-cyrl"' );
192182
assert.deepEqual( badRedirects(), [], 'All redirects have valid targets.' );
193183
assert.deepEqual( doubleRedirects(), [], 'There are no double redirects.' );
194184
assert.strictEqual( languageData.getScript( 'no-such-language' ), 'Zyyy', 'A script for an unknown language is Zyyy - undetermined' );
195185
assert.strictEqual( languageData.getScript( 'ii' ), 'Yiii', 'Correct script of the Yi language was selected' );
196186
} );
197-
it( 'directionality', function () {
187+
it( 'directionality', () => {
198188
assert.strictEqual( languageData.isRtl( 'te' ), false, 'Telugu language is not RTL' );
199189
assert.strictEqual( languageData.isRtl( 'dv' ), true, 'Divehi language is RTL' );
200190
assert.strictEqual( languageData.getDir( 'mzn' ), 'rtl', 'Mazandarani language is RTL' );

0 commit comments

Comments
 (0)