Skip to content

Commit 596469e

Browse files
committed
move removeAccents to normalization
1 parent 91ee177 commit 596469e

4 files changed

Lines changed: 79 additions & 71 deletions

File tree

src/vs/base/common/normalization.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,17 @@ function normalize(str: string, form: string, normalizedCache: LRUCache<string,
4646

4747
return res;
4848
}
49+
50+
export const removeAccents: (str: string) => string = (function () {
51+
if (!canNormalize) {
52+
// no ES6 features...
53+
return function (str: string) { return str; };
54+
} else {
55+
// transform into NFD form and remove accents
56+
// see: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
57+
const regex = /[\u0300-\u036f]/g;
58+
return function (str: string) {
59+
return normalizeNFD(str).replace(regex, '');
60+
};
61+
}
62+
})();

src/vs/base/common/strings.ts

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import { CharCode } from 'vs/base/common/charCode';
77
import { Constants } from 'vs/base/common/uint';
8-
import { canNormalize, normalizeNFD } from 'vs/base/common/normalization';
98

109
export function isFalsyOrWhitespace(str: string | undefined): boolean {
1110
if (!str || typeof str !== 'string') {
@@ -853,21 +852,6 @@ export function removeAnsiEscapeCodes(str: string): string {
853852
return str;
854853
}
855854

856-
export const removeAccents: (str: string) => string = (function () {
857-
if (!canNormalize) {
858-
// no ES6 features...
859-
return function (str: string) { return str; };
860-
} else {
861-
// transform into NFD form and remove accents
862-
// see: https://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript/37511463#37511463
863-
const regex = /[\u0300-\u036f]/g;
864-
return function (str: string) {
865-
return normalizeNFD(str).replace(regex, '');
866-
};
867-
}
868-
})();
869-
870-
871855
// -- UTF-8 BOM
872856

873857
export const UTF8_BOM_CHARACTER = String.fromCharCode(CharCode.UTF8_BOM);
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import * as assert from 'assert';
7+
import { removeAccents } from 'vs/base/common/normalization';
8+
9+
suite('Normalization', () => {
10+
11+
test('removeAccents', function () {
12+
assert.equal(removeAccents('joào'), 'joao');
13+
assert.equal(removeAccents('joáo'), 'joao');
14+
assert.equal(removeAccents('joâo'), 'joao');
15+
assert.equal(removeAccents('joäo'), 'joao');
16+
// assert.equal(strings.removeAccents('joæo'), 'joao'); // not an accent
17+
assert.equal(removeAccents('joão'), 'joao');
18+
assert.equal(removeAccents('joåo'), 'joao');
19+
assert.equal(removeAccents('joåo'), 'joao');
20+
assert.equal(removeAccents('joāo'), 'joao');
21+
22+
assert.equal(removeAccents('fôo'), 'foo');
23+
assert.equal(removeAccents('föo'), 'foo');
24+
assert.equal(removeAccents('fòo'), 'foo');
25+
assert.equal(removeAccents('fóo'), 'foo');
26+
// assert.equal(strings.removeAccents('fœo'), 'foo');
27+
// assert.equal(strings.removeAccents('føo'), 'foo');
28+
assert.equal(removeAccents('fōo'), 'foo');
29+
assert.equal(removeAccents('fõo'), 'foo');
30+
31+
assert.equal(removeAccents('andrè'), 'andre');
32+
assert.equal(removeAccents('andré'), 'andre');
33+
assert.equal(removeAccents('andrê'), 'andre');
34+
assert.equal(removeAccents('andrë'), 'andre');
35+
assert.equal(removeAccents('andrē'), 'andre');
36+
assert.equal(removeAccents('andrė'), 'andre');
37+
assert.equal(removeAccents('andrę'), 'andre');
38+
39+
assert.equal(removeAccents('hvîc'), 'hvic');
40+
assert.equal(removeAccents('hvïc'), 'hvic');
41+
assert.equal(removeAccents('hvíc'), 'hvic');
42+
assert.equal(removeAccents('hvīc'), 'hvic');
43+
assert.equal(removeAccents('hvįc'), 'hvic');
44+
assert.equal(removeAccents('hvìc'), 'hvic');
45+
46+
assert.equal(removeAccents('ûdo'), 'udo');
47+
assert.equal(removeAccents('üdo'), 'udo');
48+
assert.equal(removeAccents('ùdo'), 'udo');
49+
assert.equal(removeAccents('údo'), 'udo');
50+
assert.equal(removeAccents('ūdo'), 'udo');
51+
52+
assert.equal(removeAccents('heÿ'), 'hey');
53+
54+
// assert.equal(strings.removeAccents('gruß'), 'grus');
55+
assert.equal(removeAccents('gruś'), 'grus');
56+
assert.equal(removeAccents('gruš'), 'grus');
57+
58+
assert.equal(removeAccents('çool'), 'cool');
59+
assert.equal(removeAccents('ćool'), 'cool');
60+
assert.equal(removeAccents('čool'), 'cool');
61+
62+
assert.equal(removeAccents('ñice'), 'nice');
63+
assert.equal(removeAccents('ńice'), 'nice');
64+
});
65+
});

src/vs/base/test/common/strings.test.ts

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -404,61 +404,6 @@ suite('Strings', () => {
404404
assert.equal(strings.getNLines('foo', 0), '');
405405
});
406406

407-
test('removeAccents', function () {
408-
assert.equal(strings.removeAccents('joào'), 'joao');
409-
assert.equal(strings.removeAccents('joáo'), 'joao');
410-
assert.equal(strings.removeAccents('joâo'), 'joao');
411-
assert.equal(strings.removeAccents('joäo'), 'joao');
412-
// assert.equal(strings.removeAccents('joæo'), 'joao'); // not an accent
413-
assert.equal(strings.removeAccents('joão'), 'joao');
414-
assert.equal(strings.removeAccents('joåo'), 'joao');
415-
assert.equal(strings.removeAccents('joåo'), 'joao');
416-
assert.equal(strings.removeAccents('joāo'), 'joao');
417-
418-
assert.equal(strings.removeAccents('fôo'), 'foo');
419-
assert.equal(strings.removeAccents('föo'), 'foo');
420-
assert.equal(strings.removeAccents('fòo'), 'foo');
421-
assert.equal(strings.removeAccents('fóo'), 'foo');
422-
// assert.equal(strings.removeAccents('fœo'), 'foo');
423-
// assert.equal(strings.removeAccents('føo'), 'foo');
424-
assert.equal(strings.removeAccents('fōo'), 'foo');
425-
assert.equal(strings.removeAccents('fõo'), 'foo');
426-
427-
assert.equal(strings.removeAccents('andrè'), 'andre');
428-
assert.equal(strings.removeAccents('andré'), 'andre');
429-
assert.equal(strings.removeAccents('andrê'), 'andre');
430-
assert.equal(strings.removeAccents('andrë'), 'andre');
431-
assert.equal(strings.removeAccents('andrē'), 'andre');
432-
assert.equal(strings.removeAccents('andrė'), 'andre');
433-
assert.equal(strings.removeAccents('andrę'), 'andre');
434-
435-
assert.equal(strings.removeAccents('hvîc'), 'hvic');
436-
assert.equal(strings.removeAccents('hvïc'), 'hvic');
437-
assert.equal(strings.removeAccents('hvíc'), 'hvic');
438-
assert.equal(strings.removeAccents('hvīc'), 'hvic');
439-
assert.equal(strings.removeAccents('hvįc'), 'hvic');
440-
assert.equal(strings.removeAccents('hvìc'), 'hvic');
441-
442-
assert.equal(strings.removeAccents('ûdo'), 'udo');
443-
assert.equal(strings.removeAccents('üdo'), 'udo');
444-
assert.equal(strings.removeAccents('ùdo'), 'udo');
445-
assert.equal(strings.removeAccents('údo'), 'udo');
446-
assert.equal(strings.removeAccents('ūdo'), 'udo');
447-
448-
assert.equal(strings.removeAccents('heÿ'), 'hey');
449-
450-
// assert.equal(strings.removeAccents('gruß'), 'grus');
451-
assert.equal(strings.removeAccents('gruś'), 'grus');
452-
assert.equal(strings.removeAccents('gruš'), 'grus');
453-
454-
assert.equal(strings.removeAccents('çool'), 'cool');
455-
assert.equal(strings.removeAccents('ćool'), 'cool');
456-
assert.equal(strings.removeAccents('čool'), 'cool');
457-
458-
assert.equal(strings.removeAccents('ñice'), 'nice');
459-
assert.equal(strings.removeAccents('ńice'), 'nice');
460-
});
461-
462407
test('encodeUTF8', function () {
463408
function assertEncodeUTF8(str: string, expected: number[]): void {
464409
const actual = strings.encodeUTF8(str);

0 commit comments

Comments
 (0)