@@ -33,14 +33,27 @@ Encoding.readUTF8 = function (str) {
3333 var c2 = str . charCodeAt ( i ) & 0x3f ;
3434 var c = ( c1 << 6 ) + c2 ;
3535 arrs [ arrs . length ] = String . fromCharCode ( c ) ;
36- } else if ( charCode >= 0xe0 ) {
36+ } else if ( charCode >= 0xe0 && charCode < 0xf0 ) {
3737 var c1 = charCode & 0x0f ;
3838 i ++ ;
3939 var c2 = str . charCodeAt ( i ) & 0x3f ;
4040 i ++ ;
4141 var c3 = str . charCodeAt ( i ) & 0x3f ;
4242 var c = ( c1 << 12 ) + ( c2 << 6 ) + c3 ;
4343 arrs [ arrs . length ] = String . fromCharCode ( c ) ;
44+ } else if ( charCode >= 0xf0 ) {
45+ var c1 = charCode & 0x07 ;
46+ i ++ ;
47+ var c2 = str . charCodeAt ( i ) ;
48+ i ++ ;
49+ var c3 = str . charCodeAt ( i ) ;
50+ i ++ ;
51+ var c4 = str . charCodeAt ( i ) ;
52+ //var c = ((c1 & 0x06) << 18) + ((c2 & 0x3f)<< 12) + ((c3 & 0x3f) << 6) + (c4 & 0x3f) - 0x10000;
53+ var highCode = ( c1 & 0x07 << 8 ) + ( ( c2 & 0x3f ) << 2 ) + ( ( c3 & 0x30 ) >> 4 ) + 0xd800 - 0x0040 ;
54+ var lowCode = ( ( c3 & 0x0f ) << 6 ) + ( c4 & 0x3f ) + 0xdc00 ;
55+ arrs [ arrs . length ] = String . fromCharCode ( highCode ) ;
56+ arrs [ arrs . length ] = String . fromCharCode ( lowCode ) ;
4457 }
4558 }
4659 return arrs . join ( '' ) ;
@@ -66,16 +79,33 @@ Encoding.convert2UTF8 = function (str) {
6679 var charCode = str . charCodeAt ( i ) ;
6780 if ( charCode < 0x80 ) {
6881 arrs [ offset + i - startIdx ] = str . charAt ( i ) ;
82+ continue ;
6983 } else if ( charCode <= 0x07ff ) { //(charCode > 0xc0 && charCode < 0xe0) {
7084 var c1 = 0xc0 + ( ( charCode & 0x07c0 ) >> 6 ) ;
7185 var c2 = 0x80 + ( charCode & 0x003f ) ;
7286 arrs [ offset + i - startIdx ] = String . fromCharCode ( c1 ) + String . fromCharCode ( c2 ) ;
73- } else {
74- var c1 = 0xe0 + ( ( charCode & 0xf000 ) >> 12 ) ;
75- var c2 = 0x80 + ( ( charCode & 0x0fc0 ) >> 6 ) ;
76- var c3 = 0x80 + ( charCode & 0x003f ) ;
77- arrs [ offset + i - startIdx ] = String . fromCharCode ( c1 ) + String . fromCharCode ( c2 ) + String . fromCharCode ( c3 ) ;
87+ continue ;
88+ } else if ( charCode >= 0xd800 && charCode <= 0xdbff ) { // high-surrogate code point
89+ if ( i < str . length - 1 ) {
90+ var lowCode = str . charCodeAt ( i + 1 ) ;
91+ if ( lowCode >= 0xdc00 && lowCode <= 0xdfff ) { // low-surrogate code point
92+ i ++ ;
93+ // charCode = ((charCode & 0x03ff) << 10) + (lowCode & 0x03ff) + 0x10000;
94+ // utf8mb4: 0x10000 <= charCode <= 0x1fffff
95+ var highCode = charCode + 0x0040 ; // + 0x10000
96+ var c1 = 0xf0 + ( ( highCode & 0x0700 ) >> 16 ) ;
97+ var c2 = 0x80 + ( ( highCode & 0x00fc ) >> 2 ) ;
98+ var c3 = 0x80 + ( ( highCode & 0x0003 ) << 4 ) + ( ( lowCode & 0x03c0 ) >> 6 ) ;
99+ var c4 = 0x80 + ( lowCode & 0x003f ) ;
100+ arrs [ offset + i - startIdx ] = String . fromCharCode ( c1 ) + String . fromCharCode ( c2 ) + String . fromCharCode ( c3 ) + String . fromCharCode ( c4 ) ;
101+ continue ;
102+ }
103+ }
78104 }
105+ var c1 = 0xe0 + ( ( charCode & 0xf000 ) >> 12 ) ;
106+ var c2 = 0x80 + ( ( charCode & 0x0fc0 ) >> 6 ) ;
107+ var c3 = 0x80 + ( charCode & 0x003f ) ;
108+ arrs [ offset + i - startIdx ] = String . fromCharCode ( c1 ) + String . fromCharCode ( c2 ) + String . fromCharCode ( c3 ) ;
79109 }
80110 return arrs . join ( '' ) ;
81111} ;
0 commit comments