22 * Copyright (c) Microsoft Corporation. All rights reserved.
33 * Licensed under the MIT License. See License.txt in the project root for license information.
44 *--------------------------------------------------------------------------------------------*/
5- /* For reference:
6-
7- http://en.wikipedia.org/wiki/UTF-8
8- http://en.wikipedia.org/wiki/UTF-16
9- */
10-
115'use strict' ;
126
13- import bits = require( './bits' ) ;
14-
15- export var UTF8 = 'utf8' ;
16-
17- function byteSizeInUTF8 ( codePoint : number ) : number {
18- codePoint = codePoint >>> 0 ;
19-
20- if ( codePoint < 0x80 ) {
21- return 1 ;
22- } else if ( codePoint < 0x800 ) {
23- return 2 ;
24- } else if ( codePoint < 0x10000 ) {
25- return 3 ;
26- } else if ( codePoint < 0x200000 ) {
27- return 4 ;
28- } else if ( codePoint < 0x4000000 ) {
29- return 5 ;
30- } else if ( codePoint < 0x80000000 ) {
31- return 6 ;
32- } else {
33- throw new Error ( 'Code point 0x' + bits . toHexString ( codePoint ) + ' not encodable in UTF8.' ) ;
34- }
35- }
36-
37- function writeUTF8 ( codePoint : number , buffer : Uint8Array , pos : number ) : number {
38- // How many bits needed for codePoint
39- var byteSize = byteSizeInUTF8 ( codePoint ) ;
40-
41- // 0xxxxxxx
42- if ( byteSize === 1 ) {
43- buffer [ pos ] = codePoint ;
44- return 1 ;
45- }
46-
47- // 110xxxxx 10xxxxxx
48- // 1110xxxx 10xxxxxx 10xxxxxx
49- // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
50- // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
51- // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
52-
53- // first byte
54- buffer [ pos ] = ( ( 0xfc << ( 6 - byteSize ) ) | ( codePoint >>> ( 6 * ( byteSize - 1 ) ) ) ) & 0xff ;
55-
56- // successive bytes
57- for ( var i = 1 ; i < byteSize ; i ++ ) {
58- buffer [ pos + i ] = ( 0x80 | ( 0x3f & ( codePoint >>> ( 6 * ( byteSize - i - 1 ) ) ) ) ) & 0xff ;
59- }
60-
61- return byteSize ;
62- }
63-
64- export function encodeToUTF8 ( str : string , withBom ?: boolean ) : ArrayBuffer {
65- var i : number , len : number , length = 0 , charCode = 0 , trailCharCode = 0 , codepoint = 0 ;
66-
67- // First pass, for the size
68- for ( i = 0 , len = str . length ; i < len ; i ++ ) {
69- charCode = str . charCodeAt ( i ) ;
70-
71- // Surrogate pair
72- if ( charCode >= 0xd800 && charCode < 0xdc00 ) {
73- trailCharCode = str . charCodeAt ( ++ i ) ;
74-
75- if ( ! ( trailCharCode >= 0xdc00 && trailCharCode < 0xe000 ) ) {
76- throw new Error ( 'Invalid char code' ) ;
77- }
78-
79- // Code point can be obtained by subtracting 0xd800 and 0xdc00 from both char codes respectively
80- // and joining the 10 least significant bits from each, finally adding 0x10000.
81- codepoint = ( ( ( ( charCode - 0xd800 ) & 0x3ff ) << 10 ) | ( ( trailCharCode - 0xdc00 ) & 0x3ff ) ) + 0x10000 ;
82-
83- } else {
84- codepoint = charCode ;
85- }
86-
87- length += byteSizeInUTF8 ( codepoint ) ;
88- }
89-
90- if ( withBom ) {
91- length += 3 ;
92- }
93-
94- var result = new ArrayBuffer ( length ) ;
95- var view = new Uint8Array ( result ) ;
96- var pos = 0 ;
97-
98- if ( withBom ) {
99- view [ 0 ] = 0xEF ;
100- view [ 1 ] = 0xBB ;
101- view [ 2 ] = 0xBF ;
102- pos += 3 ;
103- }
104-
105- // Second pass, for the data
106- for ( i = 0 , len = str . length ; i < len ; i ++ ) {
107- charCode = str . charCodeAt ( i ) ;
108-
109- if ( charCode >= 0xd800 && charCode < 0xdc00 ) {
110- trailCharCode = str . charCodeAt ( ++ i ) ;
111- codepoint = ( ( ( ( charCode - 0xd800 ) & 0x3ff ) << 10 ) | ( ( trailCharCode - 0xdc00 ) & 0x3ff ) ) + 0x10000 ;
112- } else {
113- codepoint = charCode ;
114- }
115-
116- pos += writeUTF8 ( codepoint , view , pos ) ;
117- }
118-
119- return result ;
120- }
121-
122- export function encodeToUTF16 ( str : string , bufferView : DataView , offset : number , count : number ) : number {
123- var bytesToWrite = str . length * 2 ;
124- if ( bytesToWrite > count ) {
125- throw Error ( 'Unable to encode string to UTF16. Need ' + bytesToWrite + ' bytes, but only have ' + count + ' bytes.' ) ;
126- }
127-
128- for ( var i = 0 ; i < str . length ; i ++ ) {
129- bufferView . setUint16 ( offset + i * 2 , str . charCodeAt ( i ) , false ) ;
130- }
131-
132- return bytesToWrite ;
133- }
7+ export const UTF8 = 'utf8' ;
1348
135- export var SUPPORTED_ENCODINGS :{ [ encoding :string ] :{ labelLong :string ; labelShort :string ; order :number ; } } = {
9+ export const SUPPORTED_ENCODINGS :{ [ encoding :string ] :{ labelLong :string ; labelShort :string ; order :number ; } } = {
13610 utf8 : {
13711 labelLong : 'UTF-8' ,
13812 labelShort : 'UTF-8' ,
@@ -348,4 +222,4 @@ export var SUPPORTED_ENCODINGS:{[encoding:string]:{ labelLong:string; labelShort
348222 labelShort : 'GB 2312' ,
349223 order : 43
350224 }
351- } ;
225+ } ;
0 commit comments