Skip to content

Commit 1b8a37d

Browse files
committed
Add a fast TypeScript classifier
1 parent 8154b72 commit 1b8a37d

8 files changed

Lines changed: 601 additions & 13 deletions

File tree

build/gulpfile.hygiene.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,8 @@ const copyrightFilter = [
119119
'!resources/completions/**',
120120
'!extensions/markdown-language-features/media/highlight.css',
121121
'!extensions/html-language-features/server/src/modes/typescript/*',
122-
'!extensions/*/server/bin/*'
122+
'!extensions/*/server/bin/*',
123+
'!src/vs/editor/test/node/classification/typescript-test.ts',
123124
];
124125

125126
const eslintFilter = [

extensions/typescript-basics/package.json

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,7 @@
5353
{
5454
"language": "typescript",
5555
"scopeName": "source.ts",
56-
"path": "./syntaxes/TypeScript.tmLanguage.json",
57-
"tokenTypes": {
58-
"entity.name.type.instance.jsdoc": "other",
59-
"entity.name.function.tagged-template": "other",
60-
"meta.import string.quoted": "other",
61-
"variable.other.jsdoc": "other"
62-
}
56+
"path": "./syntaxes/TypeScript.tmLanguage.json"
6357
},
6458
{
6559
"language": "typescriptreact",

src/vs/editor/common/model/textModelTokens.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ export class TokenizationStateStore {
117117
if (deleteCount === 0) {
118118
return;
119119
}
120+
if (start + deleteCount > this._len) {
121+
deleteCount = this._len - start;
122+
}
120123
this._beginState.splice(start, deleteCount);
121124
this._valid.splice(start, deleteCount);
122125
this._len -= deleteCount;

src/vs/editor/common/model/tokensStore.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ export class TokensStore {
116116
if (deleteCount === 0) {
117117
return;
118118
}
119+
if (start + deleteCount > this._len) {
120+
deleteCount = this._len - start;
121+
}
119122
this._lineTokens.splice(start, deleteCount);
120123
this._len -= deleteCount;
121124
}
Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
/*---------------------------------------------------------------------------------------------
2+
* Copyright (c) Microsoft Corporation. All rights reserved.
3+
* Licensed under the MIT License. See License.txt in the project root for license information.
4+
*--------------------------------------------------------------------------------------------*/
5+
6+
import { StandardTokenType } from 'vs/editor/common/modes';
7+
import { CharCode } from 'vs/base/common/charCode';
8+
9+
class ParserContext {
10+
public readonly text: string;
11+
public readonly len: number;
12+
public readonly tokens: number[];
13+
public pos: number;
14+
15+
private currentTokenStartOffset: number;
16+
private currentTokenType: StandardTokenType;
17+
18+
constructor(text: string) {
19+
this.text = text;
20+
this.len = this.text.length;
21+
this.tokens = [];
22+
this.pos = 0;
23+
this.currentTokenStartOffset = 0;
24+
this.currentTokenType = StandardTokenType.Other;
25+
}
26+
27+
private _safeCharCodeAt(index: number): number {
28+
if (index >= this.len) {
29+
return CharCode.Null;
30+
}
31+
return this.text.charCodeAt(index);
32+
}
33+
34+
peek(distance: number = 0): number {
35+
return this._safeCharCodeAt(this.pos + distance);
36+
}
37+
38+
next(): number {
39+
const result = this._safeCharCodeAt(this.pos);
40+
this.pos++;
41+
return result;
42+
}
43+
44+
advance(distance: number): void {
45+
this.pos += distance;
46+
}
47+
48+
eof(): boolean {
49+
return this.pos >= this.len;
50+
}
51+
52+
beginToken(tokenType: StandardTokenType, deltaPos: number = 0): void {
53+
this.currentTokenStartOffset = this.pos + deltaPos;
54+
this.currentTokenType = tokenType;
55+
}
56+
57+
endToken(deltaPos: number = 0): void {
58+
const length = this.pos + deltaPos - this.currentTokenStartOffset;
59+
// check if it is touching previous token
60+
if (this.tokens.length > 0) {
61+
const previousStartOffset = this.tokens[this.tokens.length - 3];
62+
const previousLength = this.tokens[this.tokens.length - 2];
63+
const previousTokenType = this.tokens[this.tokens.length - 1];
64+
const previousEndOffset = previousStartOffset + previousLength;
65+
if (this.currentTokenStartOffset === previousEndOffset && previousTokenType === this.currentTokenType) {
66+
// extend previous token
67+
this.tokens[this.tokens.length - 2] += length;
68+
return;
69+
}
70+
}
71+
this.tokens.push(this.currentTokenStartOffset, length, this.currentTokenType);
72+
}
73+
}
74+
75+
export function parse(text: string): number[] {
76+
const ctx = new ParserContext(text);
77+
while (!ctx.eof()) {
78+
parseRoot(ctx);
79+
}
80+
return ctx.tokens;
81+
}
82+
83+
function parseRoot(ctx: ParserContext): void {
84+
let curlyCount = 0;
85+
while (!ctx.eof()) {
86+
const ch = ctx.peek();
87+
88+
switch (ch) {
89+
case CharCode.SingleQuote:
90+
parseSimpleString(ctx, CharCode.SingleQuote);
91+
break;
92+
case CharCode.DoubleQuote:
93+
parseSimpleString(ctx, CharCode.DoubleQuote);
94+
break;
95+
case CharCode.BackTick:
96+
parseInterpolatedString(ctx);
97+
break;
98+
case CharCode.Slash:
99+
parseSlash(ctx);
100+
break;
101+
case CharCode.OpenCurlyBrace:
102+
ctx.advance(1);
103+
curlyCount++;
104+
break;
105+
case CharCode.CloseCurlyBrace:
106+
ctx.advance(1);
107+
curlyCount--;
108+
if (curlyCount < 0) {
109+
return;
110+
}
111+
break;
112+
default:
113+
ctx.advance(1);
114+
}
115+
}
116+
117+
}
118+
119+
function parseSimpleString(ctx: ParserContext, closingQuote: number): void {
120+
ctx.beginToken(StandardTokenType.String);
121+
122+
// skip the opening quote
123+
ctx.advance(1);
124+
125+
while (!ctx.eof()) {
126+
const ch = ctx.next();
127+
if (ch === CharCode.Backslash) {
128+
// skip \r\n or any other character following a backslash
129+
const advanceCount = (ctx.peek() === CharCode.CarriageReturn && ctx.peek(1) === CharCode.LineFeed ? 2 : 1);
130+
ctx.advance(advanceCount);
131+
} else if (ch === closingQuote) {
132+
// hit end quote, so stop
133+
break;
134+
}
135+
}
136+
137+
ctx.endToken();
138+
}
139+
140+
function parseInterpolatedString(ctx: ParserContext): void {
141+
ctx.beginToken(StandardTokenType.String);
142+
143+
// skip the opening quote
144+
ctx.advance(1);
145+
146+
while (!ctx.eof()) {
147+
const ch = ctx.next();
148+
if (ch === CharCode.Backslash) {
149+
// skip \r\n or any other character following a backslash
150+
const advanceCount = (ctx.peek() === CharCode.CarriageReturn && ctx.peek(1) === CharCode.LineFeed ? 2 : 1);
151+
ctx.advance(advanceCount);
152+
} else if (ch === CharCode.BackTick) {
153+
// hit end quote, so stop
154+
break;
155+
} else if (ch === CharCode.DollarSign) {
156+
if (ctx.peek() === CharCode.OpenCurlyBrace) {
157+
ctx.advance(1);
158+
ctx.endToken();
159+
parseRoot(ctx);
160+
ctx.beginToken(StandardTokenType.String, -1);
161+
}
162+
}
163+
}
164+
165+
ctx.endToken();
166+
}
167+
168+
function parseSlash(ctx: ParserContext): void {
169+
170+
const nextCh = ctx.peek(1);
171+
if (nextCh === CharCode.Asterisk) {
172+
parseMultiLineComment(ctx);
173+
return;
174+
}
175+
176+
if (nextCh === CharCode.Slash) {
177+
parseSingleLineComment(ctx);
178+
return;
179+
}
180+
181+
if (tryParseRegex(ctx)) {
182+
return;
183+
}
184+
185+
ctx.advance(1);
186+
}
187+
188+
function tryParseRegex(ctx: ParserContext): boolean {
189+
// See https://www.ecma-international.org/ecma-262/10.0/index.html#prod-RegularExpressionLiteral
190+
191+
// TODO: avoid regex...
192+
let contentBefore = ctx.text.substr(ctx.pos - 100, 100);
193+
if (/[a-zA-Z0-9](\s*)$/.test(contentBefore)) {
194+
// Cannot start after an identifier
195+
return false;
196+
}
197+
198+
let pos = 0;
199+
let len = ctx.len - ctx.pos;
200+
let inClass = false;
201+
202+
// skip /
203+
pos++;
204+
205+
while (pos < len) {
206+
const ch = ctx.peek(pos++);
207+
208+
if (ch === CharCode.CarriageReturn || ch === CharCode.LineFeed) {
209+
return false;
210+
}
211+
212+
if (ch === CharCode.Backslash) {
213+
const nextCh = ctx.peek();
214+
if (nextCh === CharCode.CarriageReturn || nextCh === CharCode.LineFeed) {
215+
return false;
216+
}
217+
// skip next character
218+
pos++;
219+
continue;
220+
}
221+
222+
if (inClass) {
223+
224+
if (ch === CharCode.CloseSquareBracket) {
225+
inClass = false;
226+
continue;
227+
}
228+
229+
} else {
230+
231+
if (ch === CharCode.Slash) {
232+
// cannot be directly followed by a /
233+
if (ctx.peek(pos) === CharCode.Slash) {
234+
return false;
235+
}
236+
237+
// consume flags
238+
do {
239+
let nextCh = ctx.peek(pos);
240+
if (nextCh >= CharCode.a && nextCh <= CharCode.z) {
241+
pos++;
242+
continue;
243+
} else {
244+
break;
245+
}
246+
} while (true);
247+
248+
// TODO: avoid regex...
249+
if (/^(\s*)(\.|;|\/|,|\)|\]|\}|$)/.test(ctx.text.substr(ctx.pos + pos))) {
250+
// Must be followed by an operator of kinds
251+
ctx.beginToken(StandardTokenType.RegEx);
252+
ctx.advance(pos);
253+
ctx.endToken();
254+
return true;
255+
}
256+
257+
return false;
258+
}
259+
260+
if (ch === CharCode.OpenSquareBracket) {
261+
inClass = true;
262+
continue;
263+
}
264+
265+
}
266+
}
267+
268+
return false;
269+
}
270+
271+
function parseMultiLineComment(ctx: ParserContext): void {
272+
ctx.beginToken(StandardTokenType.Comment);
273+
274+
// skip the /*
275+
ctx.advance(2);
276+
277+
while (!ctx.eof()) {
278+
const ch = ctx.next();
279+
if (ch === CharCode.Asterisk) {
280+
if (ctx.peek() === CharCode.Slash) {
281+
ctx.advance(1);
282+
break;
283+
}
284+
}
285+
}
286+
287+
ctx.endToken();
288+
}
289+
290+
function parseSingleLineComment(ctx: ParserContext): void {
291+
ctx.beginToken(StandardTokenType.Comment);
292+
293+
// skip the //
294+
ctx.advance(2);
295+
296+
while (!ctx.eof()) {
297+
const ch = ctx.next();
298+
if (ch === CharCode.CarriageReturn || ch === CharCode.LineFeed) {
299+
break;
300+
}
301+
}
302+
303+
ctx.endToken();
304+
}

0 commit comments

Comments
 (0)