Skip to content

Commit 626049a

Browse files
author
Benjamin Pasero
committed
Large file under git control encoding guess can freeze the editor (fix microsoft#87205)
1 parent ee8eb46 commit 626049a

1 file changed

Lines changed: 6 additions & 5 deletions

File tree

src/vs/base/node/encoding.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ export const UTF16be_BOM = [0xFE, 0xFF];
2222
export const UTF16le_BOM = [0xFF, 0xFE];
2323
export const UTF8_BOM = [0xEF, 0xBB, 0xBF];
2424

25-
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
26-
const NO_GUESS_BUFFER_MAX_LEN = 512; // when not auto guessing the encoding, small number of bytes are enough
27-
const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
25+
const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512; // number of bytes to look at to decide about a file being binary or not
26+
const NO_ENCODING_GUESS_MIN_BYTES = 512; // when not auto guessing the encoding, small number of bytes are enough
27+
const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8; // with auto guessing we want a lot more content to be read for guessing
28+
const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128; // set an upper limit for the number of bytes we pass on to jschardet
2829

2930
export interface IDecodeStreamOptions {
3031
guessEncoding: boolean;
@@ -40,7 +41,7 @@ export interface IDecodeStreamResult {
4041

4142
export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> {
4243
if (!options.minBytesRequiredForDetection) {
43-
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN;
44+
options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
4445
}
4546

4647
return new Promise<IDecodeStreamResult>((resolve, reject) => {
@@ -210,7 +211,7 @@ const IGNORE_ENCODINGS = ['ascii', 'utf-16', 'utf-32'];
210211
async function guessEncodingByBuffer(buffer: Buffer): Promise<string | null> {
211212
const jschardet = await import('jschardet');
212213

213-
const guessed = jschardet.detect(buffer);
214+
const guessed = jschardet.detect(buffer.slice(0, AUTO_ENCODING_GUESS_MAX_BYTES)); // ensure to limit buffer for guessing due to https://github.com/aadsm/jschardet/issues/53
214215
if (!guessed || !guessed.encoding) {
215216
return null;
216217
}

0 commit comments

Comments
 (0)