@@ -22,9 +22,10 @@ export const UTF16be_BOM = [0xFE, 0xFF];
2222export const UTF16le_BOM = [ 0xFF , 0xFE ] ;
2323export const UTF8_BOM = [ 0xEF , 0xBB , 0xBF ] ;
2424
25- const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512 ; // number of bytes to look at to decide about a file being binary or not
26- const NO_GUESS_BUFFER_MAX_LEN = 512 ; // when not auto guessing the encoding, small number of bytes are enough
27- const AUTO_GUESS_BUFFER_MAX_LEN = 512 * 8 ; // with auto guessing we want a lot more content to be read for guessing
25+ const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512 ; // number of bytes to look at to decide about a file being binary or not
26+ const NO_ENCODING_GUESS_MIN_BYTES = 512 ; // when not auto guessing the encoding, small number of bytes are enough
27+ const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8 ; // with auto guessing we want a lot more content to be read for guessing
28+ const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128 ; // set an upper limit for the number of bytes we pass on to jschardet
2829
2930export interface IDecodeStreamOptions {
3031 guessEncoding : boolean ;
@@ -40,7 +41,7 @@ export interface IDecodeStreamResult {
4041
4142export function toDecodeStream ( readable : Readable , options : IDecodeStreamOptions ) : Promise < IDecodeStreamResult > {
4243 if ( ! options . minBytesRequiredForDetection ) {
43- options . minBytesRequiredForDetection = options . guessEncoding ? AUTO_GUESS_BUFFER_MAX_LEN : NO_GUESS_BUFFER_MAX_LEN ;
44+ options . minBytesRequiredForDetection = options . guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES ;
4445 }
4546
4647 return new Promise < IDecodeStreamResult > ( ( resolve , reject ) => {
@@ -210,7 +211,7 @@ const IGNORE_ENCODINGS = ['ascii', 'utf-16', 'utf-32'];
210211async function guessEncodingByBuffer ( buffer : Buffer ) : Promise < string | null > {
211212 const jschardet = await import ( 'jschardet' ) ;
212213
213- const guessed = jschardet . detect ( buffer ) ;
214+ const guessed = jschardet . detect ( buffer . slice ( 0 , AUTO_ENCODING_GUESS_MAX_BYTES ) ) ; // ensure to limit buffer for guessing due to https://github.com/aadsm/jschardet/issues/53
214215 if ( ! guessed || ! guessed . encoding ) {
215216 return null ;
216217 }
0 commit comments