Skip to content

Commit 789a757

Browse files
author
Benjamin Pasero
committed
encoding - adjust toDecodeStream()
1 parent 3675f28 commit 789a757

1 file changed

Lines changed: 62 additions & 53 deletions

File tree

src/vs/base/node/encoding.ts

Lines changed: 62 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import * as iconv from 'iconv-lite';
77
import { Readable, ReadableStream, newWriteableStream } from 'vs/base/common/stream';
8-
import { isUndefinedOrNull, isUndefined, isNumber } from 'vs/base/common/types';
98
import { VSBuffer, VSBufferReadable, VSBufferReadableStream } from 'vs/base/common/buffer';
109

1110
export const UTF8 = 'utf8';
@@ -41,77 +40,87 @@ export interface IDecodeStreamResult {
4140
}
4241

4342
export function toDecodeStream(source: VSBufferReadableStream, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
44-
if (!options.minBytesRequiredForDetection) {
45-
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
46-
}
43+
const minBytesRequiredForDetection = options.minBytesRequiredForDetection ?? options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
4744

4845
return new Promise<IDecodeStreamResult>((resolve, reject) => {
4946
const target = newWriteableStream<string>(strings => strings.join(''));
5047

5148
const bufferedChunks: VSBuffer[] = [];
5249
let bytesBuffered = 0;
53-
let decoder: iconv.DecoderStream | null = null;
54-
55-
const startDecodeStream = () => {
56-
return Promise.resolve()
57-
.then(() =>
58-
// detect encoding from buffer
59-
detectEncodingFromBuffer({
60-
buffer: Buffer.from(VSBuffer.concat(bufferedChunks).buffer),
61-
bytesRead: bytesBuffered
62-
}, options.guessEncoding)
63-
)
64-
.then(detected => {
65-
// ensure to respect overwrite of encoding
66-
detected.encoding = options.overwriteEncoding(detected.encoding);
67-
68-
// decode and write buffered content
69-
decoder = iconv.getDecoder(toNodeEncoding(detected.encoding));
70-
const nodeBuffer = Buffer.from(VSBuffer.concat(bufferedChunks).buffer);
71-
target.write(decoder.write(nodeBuffer));
72-
bufferedChunks.length = 0;
73-
74-
// signal to the outside our detected encoding
75-
// and final decoder stream
76-
resolve({
77-
stream: target,
78-
detected,
79-
});
80-
})
81-
.catch(reject);
50+
51+
let decoder: iconv.DecoderStream | undefined = undefined;
52+
53+
const createDecoder = async () => {
54+
try {
55+
56+
// detect encoding from buffer
57+
const detected = await detectEncodingFromBuffer({
58+
buffer: Buffer.from(VSBuffer.concat(bufferedChunks).buffer),
59+
bytesRead: bytesBuffered
60+
}, options.guessEncoding);
61+
62+
// ensure to respect overwrite of encoding
63+
detected.encoding = options.overwriteEncoding(detected.encoding);
64+
65+
// decode and write buffered content
66+
decoder = iconv.getDecoder(toNodeEncoding(detected.encoding));
67+
const nodeBuffer = Buffer.from(VSBuffer.concat(bufferedChunks).buffer);
68+
target.write(decoder.write(nodeBuffer));
69+
bufferedChunks.length = 0;
70+
71+
// signal to the outside our detected encoding and final decoder stream
72+
resolve({
73+
stream: target,
74+
detected
75+
});
76+
} catch (error) {
77+
reject(error);
78+
}
8279
};
8380

84-
source.on('error', target.error);
85-
source.on('data', (chunk) => {
81+
// Stream error: forward to target
82+
source.on('error', error => target.error(error));
83+
84+
// Stream data
85+
source.on('data', async chunk => {
86+
8687
// if the decoder is ready, we just write directly
87-
if (!isUndefinedOrNull(decoder)) {
88+
if (decoder) {
8889
target.write(decoder.write(Buffer.from(chunk.buffer)));
89-
return;
9090
}
9191

9292
// otherwise we need to buffer the data until the stream is ready
93-
bufferedChunks.push(chunk);
94-
bytesBuffered += chunk.byteLength;
93+
else {
94+
bufferedChunks.push(chunk);
95+
bytesBuffered += chunk.byteLength;
96+
97+
// buffered enough data for encoding detection, create stream
98+
if (bytesBuffered >= minBytesRequiredForDetection) {
99+
100+
// pause stream here until the decoder is ready
101+
source.pause();
95102

96-
// buffered enough data for encoding detection, create stream and forward data
97-
if (isNumber(options.minBytesRequiredForDetection) && bytesBuffered >= options.minBytesRequiredForDetection) {
98-
startDecodeStream();
103+
await createDecoder();
104+
105+
// resume stream now that decoder is ready but
106+
// outside of this stack to reduce recursion
107+
setTimeout(() => source.resume());
108+
}
99109
}
100110
});
101-
source.on('end', () => {
102-
// normal finish
103-
if (!isUndefinedOrNull(decoder)) {
104-
target.end(decoder.end());
105-
}
111+
112+
// Stream end
113+
source.on('end', async () => {
106114

107115
// we were still waiting for data to do the encoding
108116
// detection. thus, wrap up starting the stream even
109117
// without all the data to get things going
110-
else {
111-
startDecodeStream().then(() => {
112-
target.end(decoder?.end());
113-
});
118+
if (!decoder) {
119+
await createDecoder();
114120
}
121+
122+
// end the target with the remainders of the decoder
123+
target.end(decoder?.end());
115124
});
116125
});
117126
}
@@ -128,7 +137,7 @@ export function toEncodeReadable(readable: Readable<string>, encoding: string, o
128137
}
129138

130139
const chunk = readable.read();
131-
if (isUndefinedOrNull(chunk)) {
140+
if (typeof chunk !== 'string') {
132141
done = true;
133142

134143
// If we are instructed to add a BOM but we detect that no
@@ -147,7 +156,7 @@ export function toEncodeReadable(readable: Readable<string>, encoding: string, o
147156
}
148157

149158
const leftovers = encoder.end();
150-
if (!isUndefined(leftovers) && leftovers.length > 0) {
159+
if (leftovers && leftovers.length > 0) {
151160
return VSBuffer.wrap(leftovers);
152161
}
153162

0 commit comments

Comments
 (0)