|
4 | 4 | *--------------------------------------------------------------------------------------------*/ |
5 | 5 |
|
6 | 6 | import * as iconv from 'iconv-lite'; |
7 | | -import { Readable, Writable } from 'stream'; |
8 | | -import { VSBuffer } from 'vs/base/common/buffer'; |
| 7 | +import { Readable, ReadableStream, newWriteableStream } from 'vs/base/common/stream'; |
| 8 | +import { isUndefinedOrNull, isUndefined, isNumber } from 'vs/base/common/types'; |
| 9 | +import { VSBuffer, VSBufferReadable, VSBufferReadableStream } from 'vs/base/common/buffer'; |
9 | 10 |
|
10 | 11 | export const UTF8 = 'utf8'; |
11 | 12 | export const UTF8_with_bom = 'utf8bom'; |
@@ -35,121 +36,135 @@ export interface IDecodeStreamOptions { |
35 | 36 | } |
36 | 37 |
|
37 | 38 | export interface IDecodeStreamResult { |
38 | | - stream: NodeJS.ReadableStream; |
| 39 | + stream: ReadableStream<string>; |
39 | 40 | detected: IDetectedEncodingResult; |
40 | 41 | } |
41 | 42 |
|
42 | | -export function toDecodeStream(readable: Readable, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> { |
| 43 | +export function toDecodeStream(source: VSBufferReadableStream, options: IDecodeStreamOptions): Promise<IDecodeStreamResult> { |
43 | 44 | if (!options.minBytesRequiredForDetection) { |
44 | 45 | options.minBytesRequiredForDetection = options.guessEncoding ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES; |
45 | 46 | } |
46 | 47 |
|
47 | 48 | return new Promise<IDecodeStreamResult>((resolve, reject) => { |
48 | | - const writer = new class extends Writable { |
49 | | - private decodeStream: NodeJS.ReadWriteStream | undefined; |
50 | | - private decodeStreamPromise: Promise<void> | undefined; |
51 | | - |
52 | | - private bufferedChunks: Buffer[] = []; |
53 | | - private bytesBuffered = 0; |
54 | | - |
55 | | - _write(chunk: Buffer, encoding: string, callback: (error: Error | null | undefined) => void): void { |
56 | | - if (!Buffer.isBuffer(chunk)) { |
57 | | - return callback(new Error('toDecodeStream(): data must be a buffer')); |
58 | | - } |
59 | | - |
60 | | - // if the decode stream is ready, we just write directly |
61 | | - if (this.decodeStream) { |
62 | | - this.decodeStream.write(chunk, callback); |
63 | | - |
64 | | - return; |
65 | | - } |
66 | | - |
67 | | - // otherwise we need to buffer the data until the stream is ready |
68 | | - this.bufferedChunks.push(chunk); |
69 | | - this.bytesBuffered += chunk.byteLength; |
70 | | - |
71 | | - // waiting for the decoder to be ready |
72 | | - if (this.decodeStreamPromise) { |
73 | | - this.decodeStreamPromise.then(() => callback(null), error => callback(error)); |
74 | | - } |
75 | | - |
76 | | - // buffered enough data for encoding detection, create stream and forward data |
77 | | - else if (typeof options.minBytesRequiredForDetection === 'number' && this.bytesBuffered >= options.minBytesRequiredForDetection) { |
78 | | - this._startDecodeStream(callback); |
79 | | - } |
80 | | - |
81 | | - // only buffering until enough data for encoding detection is there |
82 | | - else { |
83 | | - callback(null); |
84 | | - } |
85 | | - } |
86 | | - |
87 | | - _startDecodeStream(callback: (error: Error | null | undefined) => void): void { |
88 | | - |
89 | | - // detect encoding from buffer |
90 | | - this.decodeStreamPromise = Promise.resolve(detectEncodingFromBuffer({ |
91 | | - buffer: Buffer.concat(this.bufferedChunks), |
92 | | - bytesRead: this.bytesBuffered |
93 | | - }, options.guessEncoding)).then(detected => { |
94 | | - |
| 49 | + const target = newWriteableStream<string>(strings => strings.join('')); |
| 50 | + |
| 51 | + const bufferedChunks: VSBuffer[] = []; |
| 52 | + let bytesBuffered = 0; |
| 53 | + let decoder: iconv.DecoderStream | null = null; |
| 54 | + |
| 55 | + const startDecodeStream = () => { |
| 56 | + return Promise.resolve() |
| 57 | + .then(() => |
| 58 | + // detect encoding from buffer |
| 59 | + detectEncodingFromBuffer({ |
| 60 | + buffer: Buffer.from(VSBuffer.concat(bufferedChunks).buffer), |
| 61 | + bytesRead: bytesBuffered |
| 62 | + }, options.guessEncoding) |
| 63 | + ) |
| 64 | + .then(detected => { |
95 | 65 | // ensure to respect overwrite of encoding |
96 | 66 | detected.encoding = options.overwriteEncoding(detected.encoding); |
97 | 67 |
|
98 | | - // decode and write buffer |
99 | | - this.decodeStream = decodeStream(detected.encoding); |
100 | | - this.decodeStream.write(Buffer.concat(this.bufferedChunks), callback); |
101 | | - this.bufferedChunks.length = 0; |
| 68 | + // decode and write buffered content |
| 69 | + decoder = iconv.getDecoder(toNodeEncoding(detected.encoding)); |
| 70 | + const nodeBuffer = Buffer.from(VSBuffer.concat(bufferedChunks).buffer); |
| 71 | + target.write(decoder.write(nodeBuffer)); |
| 72 | + bufferedChunks.length = 0; |
102 | 73 |
|
103 | 74 | // signal to the outside our detected encoding |
104 | 75 | // and final decoder stream |
105 | | - resolve({ detected, stream: this.decodeStream }); |
106 | | - }, error => { |
107 | | - this.emit('error', error); |
| 76 | + resolve({ |
| 77 | + stream: target, |
| 78 | + detected, |
| 79 | + }); |
| 80 | + }) |
| 81 | + .catch(reject); |
| 82 | + }; |
| 83 | + |
| 84 | + source.on('error', target.error); |
| 85 | + source.on('data', (chunk) => { |
| 86 | + // if the decoder is ready, we just write directly |
| 87 | + if (!isUndefinedOrNull(decoder)) { |
| 88 | + target.write(decoder.write(Buffer.from(chunk.buffer))); |
| 89 | + return; |
| 90 | + } |
| 91 | + |
| 92 | + // otherwise we need to buffer the data until the stream is ready |
| 93 | + bufferedChunks.push(chunk); |
| 94 | + bytesBuffered += chunk.byteLength; |
| 95 | + |
| 96 | + // buffered enough data for encoding detection, create stream and forward data |
| 97 | + if (isNumber(options.minBytesRequiredForDetection) && bytesBuffered >= options.minBytesRequiredForDetection) { |
| 98 | + startDecodeStream(); |
| 99 | + } |
| 100 | + }); |
| 101 | + source.on('end', () => { |
| 102 | + // normal finish |
| 103 | + if (!isUndefinedOrNull(decoder)) { |
| 104 | + target.end(decoder.end()); |
| 105 | + } |
108 | 106 |
|
109 | | - callback(error); |
| 107 | + // we were still waiting for data to do the encoding |
| 108 | + // detection. thus, wrap up starting the stream even |
| 109 | + // without all the data to get things going |
| 110 | + else { |
| 111 | + startDecodeStream().then(() => { |
| 112 | + target.end(decoder?.end()); |
110 | 113 | }); |
111 | 114 | } |
| 115 | + }); |
| 116 | + }); |
| 117 | +} |
| 118 | + |
| 119 | +export function toEncodeReadable(readable: Readable<string>, encoding: string, options?: { addBOM?: boolean }): VSBufferReadable { |
| 120 | + const encoder = iconv.getEncoder(toNodeEncoding(encoding), options); |
| 121 | + let bytesRead = 0; |
| 122 | + let done = false; |
112 | 123 |
|
113 | | - _final(callback: () => void) { |
| 124 | + return { |
| 125 | + read() { |
| 126 | + if (done) { |
| 127 | + return null; |
| 128 | + } |
114 | 129 |
|
115 | | - // normal finish |
116 | | - if (this.decodeStream) { |
117 | | - this.decodeStream.end(callback); |
| 130 | + const chunk = readable.read(); |
| 131 | + if (isUndefinedOrNull(chunk)) { |
| 132 | + done = true; |
| 133 | + |
| 134 | + // If we are instructed to add a BOM but we detect that no |
| 135 | + // bytes have been read, we must ensure to return the BOM |
| 136 | + // ourselves so that we comply with the contract. |
| 137 | + if (bytesRead === 0 && options?.addBOM) { |
| 138 | + switch (encoding) { |
| 139 | + case UTF8: |
| 140 | + case UTF8_with_bom: |
| 141 | + return VSBuffer.wrap(Buffer.from(UTF8_BOM)); |
| 142 | + case UTF16be: |
| 143 | + return VSBuffer.wrap(Buffer.from(UTF16be_BOM)); |
| 144 | + case UTF16le: |
| 145 | + return VSBuffer.wrap(Buffer.from(UTF16le_BOM)); |
| 146 | + } |
118 | 147 | } |
119 | 148 |
|
120 | | - // we were still waiting for data to do the encoding |
121 | | - // detection. thus, wrap up starting the stream even |
122 | | - // without all the data to get things going |
123 | | - else { |
124 | | - this._startDecodeStream(() => { |
125 | | - if (this.decodeStream) { |
126 | | - this.decodeStream.end(callback); |
127 | | - } |
128 | | - }); |
| 149 | + const leftovers = encoder.end(); |
| 150 | + if (!isUndefined(leftovers) && leftovers.length > 0) { |
| 151 | + return VSBuffer.wrap(leftovers); |
129 | 152 | } |
| 153 | + |
| 154 | + return null; |
130 | 155 | } |
131 | | - }; |
132 | 156 |
|
133 | | - // errors |
134 | | - readable.on('error', reject); |
| 157 | + bytesRead += chunk.length; |
135 | 158 |
|
136 | | - // pipe through |
137 | | - readable.pipe(writer); |
138 | | - }); |
| 159 | + return VSBuffer.wrap(encoder.write(chunk)); |
| 160 | + } |
| 161 | + }; |
139 | 162 | } |
140 | 163 |
|
141 | 164 | export function encodingExists(encoding: string): boolean { |
142 | 165 | return iconv.encodingExists(toNodeEncoding(encoding)); |
143 | 166 | } |
144 | 167 |
|
145 | | -function decodeStream(encoding: string | null): NodeJS.ReadWriteStream { |
146 | | - return iconv.decodeStream(toNodeEncoding(encoding)); |
147 | | -} |
148 | | - |
149 | | -export function encodeStream(encoding: string, options?: { addBOM?: boolean }): NodeJS.ReadWriteStream { |
150 | | - return iconv.encodeStream(toNodeEncoding(encoding), options); |
151 | | -} |
152 | | - |
153 | 168 | export function toNodeEncoding(enc: string | null): string { |
154 | 169 | if (enc === UTF8_with_bom || enc === null) { |
155 | 170 | return UTF8; // iconv does not distinguish UTF 8 with or without BOM, so we need to help it |
|
0 commit comments