forked from nodejs/node
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstring_decoder.js
More file actions
183 lines (167 loc) · 5.38 KB
/
Copy pathstring_decoder.js
File metadata and controls
183 lines (167 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
'use strict';
const Buffer = require('buffer').Buffer;
// Do not cache `Buffer.isEncoding` when checking encoding names as some
// modules monkey-patch it to support additional encodings
function normalizeEncoding(enc) {
if (!enc) return 'utf8';
var low;
for (;;) {
switch (enc) {
case 'utf8':
case 'utf-8':
return 'utf8';
case 'ucs2':
case 'utf16le':
case 'ucs-2':
case 'utf-16le':
return 'utf16le';
case 'base64':
case 'ascii':
case 'latin1':
case 'binary':
case 'hex':
return enc;
default:
if (low) {
if (!Buffer.isEncoding(enc))
throw new Error('Unknown encoding: ' + enc);
return enc;
}
low = true;
enc = ('' + enc).toLowerCase();
}
}
}
// StringDecoder provides an interface for efficiently splitting a series of
// buffers into a series of JS strings without breaking apart multi-byte
// characters.
exports.StringDecoder = StringDecoder;
function StringDecoder(encoding) {
this.encoding = normalizeEncoding(encoding);
var nb;
switch (this.encoding) {
case 'utf16le':
this.complete = utf16Complete;
this.flush = simpleFlush;
// fall through
case 'utf8':
nb = 4;
break;
case 'base64':
this.complete = base64Complete;
this.flush = simpleFlush;
nb = 3;
break;
default:
this.write = simpleWrite;
this.end = simpleEnd;
return;
}
this.partial = 0;
this.lastChar = Buffer.allocUnsafe(nb);
}
StringDecoder.prototype.write = function(buf) {
if (buf.length === 0)
return '';
const partial = this.partial;
if (!partial)
return this.text(buf, 0, buf.length);
// We have incomplete characters in partial many bytes from last run.
// Copy bytes from buf to fill lastChar (if there is enough input).
const newHeadLen = Math.min(buf.length, this.lastChar.length - partial);
const totalHeadLen = newHeadLen + partial;
buf.copy(this.lastChar, partial, 0, newHeadLen);
// Now we have totalHeadLen bytes of input in lastChar, try to convert that.
let r = this.text(this.lastChar, 0, totalHeadLen);
if (this.partial <= newHeadLen) // consumed at least all the old head
r += this.text(buf, newHeadLen - this.partial, buf.length);
return r;
};
// Returns only complete characters in a Buffer
StringDecoder.prototype.text = function(buf, start, end) {
if (start === end)
return '';
const complete = this.complete(buf, start, end);
this.partial = end - complete;
if (this.partial && buf !== this.lastChar)
buf.copy(this.lastChar, 0, complete, end);
if (start === complete)
return '';
return buf.toString(this.encoding, start, complete);
};
// Returns a suitable representation of incomplete characters as well
StringDecoder.prototype.end = function(buf) {
let r = (buf && buf.length ? this.write(buf) : '');
if (this.partial) {
r += this.flush();
this.partial = 0;
}
return r;
};
// Given (buf, start, end), determine the maximal n <= end such that
// buf.slice(start, n) contains only complete characters
StringDecoder.prototype.complete = utf8Complete;
// Returns a string representation of the this.partial bytes in
// this.lastChar which represent an incomplete character
StringDecoder.prototype.flush = utf8Flush;
// Checks at most the last 3 bytes of a Buffer for an incomplete UTF-8
// character, returning the position after the last complete character.
function utf8Complete(buf, start, end) {
if (start > end - 3)
start = end - 3;
for (let i = end - 1; i >= start; --i) {
const byte = buf[i];
let numBytes;
if (byte >> 6 === 0x02)
continue; // continuation byte
else if (byte >> 5 === 0x06)
numBytes = 2;
else if (byte >> 4 === 0x0E)
numBytes = 3;
else if (byte >> 3 === 0x1E)
numBytes = 4;
else
numBytes = 1; // ASCII or invalid
if (i + numBytes > end) // incomplete
return i; // continue next run at leading byte
// Have complete sequence, possibly followed by garbage continuation.
return end;
}
// Ends in valid 4-byte sequence or invalid continuation characters.
// Either way the input is complete, so convert it as is.
return end;
}
// For UTF-8, a replacement character for each buffered byte of a (partial)
// character needs to be added to the output.
function utf8Flush() {
return '\ufffd'.repeat(this.partial);
}
// UTF-16LE typically needs two bytes per character, but even if we have an even
// number of bytes available, we need to check if we end on a leading/high
// surrogate. In that case, we need to wait for the next two bytes in order to
// decode the last character properly.
function utf16Complete(buf, start, end) {
if ((end - start) & 1)
--end;
if (end > start) {
const byte = buf[end - 1];
if (byte >= 0xD8 && byte <= 0xDB)
return end - 2;
}
return end;
}
function base64Complete(buf, start, end) {
return end - (end - start) % 3;
}
// For UTF-16LE and Base64 we do not explicitly append special replacement
// characters if we end on a partial character, we simply let v8 handle that.
function simpleFlush() {
return this.lastChar.toString(this.encoding, 0, this.partial);
}
// Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
function simpleWrite(buf) {
return buf.toString(this.encoding);
}
function simpleEnd(buf) {
return (buf && buf.length ? this.write(buf) : '');
}