Skip to content

Commit a687feb

Browse files
committed
Add fallback encoding/decoding
1 parent 7a089f7 commit a687feb

4 files changed

Lines changed: 171 additions & 11 deletions

File tree

src/vs/base/common/buffer.ts

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,13 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55

6+
import * as strings from 'vs/base/common/strings';
7+
68
declare var Buffer: any;
7-
export const hasBuffer = (typeof Buffer !== 'undefined');
9+
10+
const hasBuffer = (typeof Buffer !== 'undefined');
11+
const hasTextEncoder = (typeof TextEncoder !== 'undefined');
12+
const hasTextDecoder = (typeof TextDecoder !== 'undefined');
813

914
let textEncoder: TextEncoder | null;
1015
let textDecoder: TextDecoder | null;
@@ -31,11 +36,13 @@ export class VSBuffer {
3136
static fromString(source: string): VSBuffer {
3237
if (hasBuffer) {
3338
return new VSBuffer(Buffer.from(source));
34-
} else {
39+
} else if (hasTextEncoder) {
3540
if (!textEncoder) {
3641
textEncoder = new TextEncoder();
3742
}
3843
return new VSBuffer(textEncoder.encode(source));
44+
} else {
45+
return new VSBuffer(strings.encodeUTF8(source));
3946
}
4047
}
4148

@@ -69,11 +76,13 @@ export class VSBuffer {
6976
toString(): string {
7077
if (hasBuffer) {
7178
return this.buffer.toString();
72-
} else {
79+
} else if (hasTextDecoder) {
7380
if (!textDecoder) {
7481
textDecoder = new TextDecoder();
7582
}
7683
return textDecoder.decode(this.buffer);
84+
} else {
85+
return strings.decodeUTF8(this.buffer);
7786
}
7887
}
7988

src/vs/base/common/strings.ts

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,119 @@ class MarkClassifier {
672672
}
673673
}
674674

675+
/**
676+
* A manual encoding of `str` to UTF8.
677+
* Use only in environments which do not offer native conversion methods!
678+
*/
679+
export function encodeUTF8(str: string): Uint8Array {
680+
const strLen = str.length;
681+
682+
// See https://en.wikipedia.org/wiki/UTF-8
683+
684+
// first loop to establish needed buffer size
685+
let neededSize = 0;
686+
let strOffset = 0;
687+
while (strOffset < strLen) {
688+
const codePoint = getNextCodePoint(str, strLen, strOffset);
689+
strOffset += (codePoint >= Constants.UNICODE_SUPPLEMENTARY_PLANE_BEGIN ? 2 : 1);
690+
691+
if (codePoint < 0x0080) {
692+
neededSize += 1;
693+
} else if (codePoint < 0x0800) {
694+
neededSize += 2;
695+
} else if (codePoint < 0x10000) {
696+
neededSize += 3;
697+
} else {
698+
neededSize += 4;
699+
}
700+
}
701+
702+
// second loop to actually encode
703+
const arr = new Uint8Array(neededSize);
704+
strOffset = 0;
705+
let arrOffset = 0;
706+
while (strOffset < strLen) {
707+
const codePoint = getNextCodePoint(str, strLen, strOffset);
708+
strOffset += (codePoint >= Constants.UNICODE_SUPPLEMENTARY_PLANE_BEGIN ? 2 : 1);
709+
710+
if (codePoint < 0x0080) {
711+
arr[arrOffset++] = codePoint;
712+
} else if (codePoint < 0x0800) {
713+
arr[arrOffset++] = 0b11000000 | ((codePoint & 0b00000000000000000000011111000000) >>> 6);
714+
arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
715+
} else if (codePoint < 0x10000) {
716+
arr[arrOffset++] = 0b11100000 | ((codePoint & 0b00000000000000001111000000000000) >>> 12);
717+
arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000111111000000) >>> 6);
718+
arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
719+
} else {
720+
arr[arrOffset++] = 0b11110000 | ((codePoint & 0b00000000000111000000000000000000) >>> 18);
721+
arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000111111000000000000) >>> 12);
722+
arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000111111000000) >>> 6);
723+
arr[arrOffset++] = 0b10000000 | ((codePoint & 0b00000000000000000000000000111111) >>> 0);
724+
}
725+
}
726+
727+
return arr;
728+
}
729+
730+
/**
731+
* A manual decoding of a UTF8 string.
732+
* Use only in environments which do not offer native conversion methods!
733+
*/
734+
export function decodeUTF8(buffer: Uint8Array): string {
735+
// https://en.wikipedia.org/wiki/UTF-8
736+
737+
const len = buffer.byteLength;
738+
const result: string[] = [];
739+
let offset = 0;
740+
while (offset < len) {
741+
const v0 = buffer[offset];
742+
let codePoint: number;
743+
if (v0 >= 0b11110000 && offset + 3 < len) {
744+
// 4 bytes
745+
codePoint = (
746+
(((buffer[offset++] & 0b00000111) << 18) >>> 0)
747+
| (((buffer[offset++] & 0b00111111) << 12) >>> 0)
748+
| (((buffer[offset++] & 0b00111111) << 6) >>> 0)
749+
| (((buffer[offset++] & 0b00111111) << 0) >>> 0)
750+
);
751+
} else if (v0 >= 0b11100000 && offset + 2 < len) {
752+
// 3 bytes
753+
codePoint = (
754+
(((buffer[offset++] & 0b00001111) << 12) >>> 0)
755+
| (((buffer[offset++] & 0b00111111) << 6) >>> 0)
756+
| (((buffer[offset++] & 0b00111111) << 0) >>> 0)
757+
);
758+
} else if (v0 >= 0b11000000 && offset + 1 < len) {
759+
// 2 bytes
760+
codePoint = (
761+
(((buffer[offset++] & 0b00011111) << 6) >>> 0)
762+
| (((buffer[offset++] & 0b00111111) << 0) >>> 0)
763+
);
764+
} else {
765+
// 1 byte
766+
codePoint = buffer[offset++];
767+
}
768+
769+
if ((codePoint >= 0 && codePoint <= 0xD7FF) || (codePoint >= 0xE000 && codePoint <= 0xFFFF)) {
770+
// Basic Multilingual Plane
771+
result.push(String.fromCharCode(codePoint));
772+
} else if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
773+
// Supplementary Planes
774+
const uPrime = codePoint - 0x10000;
775+
const w1 = 0xD800 + ((uPrime & 0b11111111110000000000) >>> 10);
776+
const w2 = 0xDC00 + ((uPrime & 0b00000000001111111111) >>> 0);
777+
result.push(String.fromCharCode(w1));
778+
result.push(String.fromCharCode(w2));
779+
} else {
780+
// illegal code point
781+
result.push(String.fromCharCode(0xFFFD));
782+
}
783+
}
784+
785+
return result.join('');
786+
}
787+
675788
/**
676789
* Generated using https://github.com/alexandrudima/unicode-utils/blob/master/generate-rtl-test.js
677790
*/

src/vs/base/test/common/strings.test.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,4 +458,38 @@ suite('Strings', () => {
458458
assert.equal(strings.removeAccents('ñice'), 'nice');
459459
assert.equal(strings.removeAccents('ńice'), 'nice');
460460
});
461+
462+
test('encodeUTF8', function () {
463+
function assertEncodeUTF8(str: string, expected: number[]): void {
464+
const actual = strings.encodeUTF8(str);
465+
const actualArr: number[] = [];
466+
for (let offset = 0; offset < actual.byteLength; offset++) {
467+
actualArr[offset] = actual[offset];
468+
}
469+
assert.deepEqual(actualArr, expected);
470+
}
471+
472+
function assertDecodeUTF8(data: number[], expected: string): void {
473+
const actual = strings.decodeUTF8(new Uint8Array(data));
474+
assert.deepEqual(actual, expected);
475+
}
476+
477+
function assertEncodeDecodeUTF8(str: string, buff: number[]): void {
478+
assertEncodeUTF8(str, buff);
479+
assertDecodeUTF8(buff, str);
480+
}
481+
482+
assertEncodeDecodeUTF8('\u0000', [0]);
483+
assertEncodeDecodeUTF8('!', [33]);
484+
assertEncodeDecodeUTF8('\u007F', [127]);
485+
assertEncodeDecodeUTF8('\u0080', [194, 128]);
486+
assertEncodeDecodeUTF8('Ɲ', [198, 157]);
487+
assertEncodeDecodeUTF8('\u07FF', [223, 191]);
488+
assertEncodeDecodeUTF8('\u0800', [224, 160, 128]);
489+
assertEncodeDecodeUTF8('ஂ', [224, 174, 130]);
490+
assertEncodeDecodeUTF8('\uffff', [239, 191, 191]);
491+
assertEncodeDecodeUTF8('\u10000', [225, 128, 128, 48]);
492+
assertEncodeDecodeUTF8('🧝', [240, 159, 167, 157]);
493+
494+
});
461495
});

src/vs/workbench/browser/web.main.ts

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -211,17 +211,21 @@ class BrowserMain extends Disposable {
211211
private registerFileSystemProviders(environmentService: IWorkbenchEnvironmentService, fileService: IFileService, remoteAgentService: IRemoteAgentService, logService: BufferLogService, logsPath: URI): void {
212212

213213
// Logger
214-
const indexedDBLogProvider = new IndexedDBLogProvider(logsPath.scheme);
215214
(async () => {
216-
try {
217-
await indexedDBLogProvider.database;
215+
if (browser.isEdge) {
216+
fileService.registerProvider(logsPath.scheme, new InMemoryLogProvider(logsPath.scheme));
217+
} else {
218+
try {
219+
const indexedDBLogProvider = new IndexedDBLogProvider(logsPath.scheme);
220+
await indexedDBLogProvider.database;
218221

219-
fileService.registerProvider(logsPath.scheme, indexedDBLogProvider);
220-
} catch (error) {
221-
logService.info('Error while creating indexedDB log provider. Falling back to in-memory log provider.');
222-
logService.error(error);
222+
fileService.registerProvider(logsPath.scheme, indexedDBLogProvider);
223+
} catch (error) {
224+
logService.info('Error while creating indexedDB log provider. Falling back to in-memory log provider.');
225+
logService.error(error);
223226

224-
fileService.registerProvider(logsPath.scheme, new InMemoryLogProvider(logsPath.scheme));
227+
fileService.registerProvider(logsPath.scheme, new InMemoryLogProvider(logsPath.scheme));
228+
}
225229
}
226230

227231
const consoleLogService = new ConsoleLogService(logService.getLevel());

0 commit comments

Comments
 (0)