-
Notifications
You must be signed in to change notification settings - Fork 28
Expand file tree
/
Copy pathutf.ts
More file actions
64 lines (62 loc) · 1.72 KB
/
utf.ts
File metadata and controls
64 lines (62 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
/**
* Returns the number of UTF-8 bytes required to represent the given Unicode code point.
*
* @param {number} codePointValue - The Unicode code point value.
* @return {number} The number of UTF-8 bytes needed to represent the code point.
*/
function numUtf8BytesForCodePoint(codePointValue: number): number {
if (codePointValue < 0x80) {
return 1;
}
if (codePointValue < 0x800) {
return 2;
}
if (codePointValue < 0x10000) {
return 3;
}
return 4;
}
/**
* Calculates for some prefix of the given text, how many bytes the UTF-8
* representation would be. Undefined behavior if the number of code units
* doesn't correspond to a valid UTF-8 sequence.
* @param text - Text to examine.
* @param numCodeUnits The number of code units to look at.
* @returns The number of bytes.
*/
export function numCodeUnitsToNumUtf8Bytes(
text: string,
numCodeUnits?: number,
): number {
if (numCodeUnits === 0) {
return 0;
}
let curNumUtf8Bytes = 0;
let curNumCodeUnits = 0;
for (const codePoint of text) {
curNumCodeUnits += codePoint.length;
curNumUtf8Bytes += numUtf8BytesForCodePoint(codePoint.codePointAt(0)!);
if (numCodeUnits !== undefined && curNumCodeUnits >= numCodeUnits) {
break;
}
}
return curNumUtf8Bytes;
}
export function numUtf8BytesToNumCodeUnits(
text: string,
numUtf8Bytes?: number,
): number {
if (numUtf8Bytes === 0) {
return 0;
}
let curNumCodeUnits = 0;
let curNumUtf8Bytes = 0;
for (const codePoint of text) {
curNumUtf8Bytes += numUtf8BytesForCodePoint(codePoint.codePointAt(0)!);
curNumCodeUnits += codePoint.length;
if (numUtf8Bytes !== undefined && curNumUtf8Bytes >= numUtf8Bytes) {
break;
}
}
return curNumCodeUnits;
}