Skip to content

Commit 3cc0817

Browse files
committed
Record language for each seen scope (microsoft#14136)
1 parent 7a90911 commit 3cc0817

2 files changed

Lines changed: 81 additions & 47 deletions

File tree

src/vs/editor/node/textMate/TMSyntax.ts

Lines changed: 80 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ export class TMScopeRegistry {
7272
public register(language: string, scopeName: string, filePath: string): void {
7373
this._scopeNameToFilePath[scopeName] = filePath;
7474
if (language) {
75-
this._scopeNameToLanguage[scopeName] = language;
75+
this._scopeNameToLanguage[scopeName] = language;
7676
this._cachedScopesRegex = null;
77-
}
77+
}
7878
}
7979

8080
public getFilePath(scopeName: string): string {
@@ -199,71 +199,104 @@ export class MainProcessTextMateSyntax {
199199
return;
200200
}
201201

202-
TokenizationRegistry.register(modeId, createTokenizationSupport(modeId, grammar));
202+
TokenizationRegistry.register(modeId, createTokenizationSupport(this._scopeRegistry, modeId, grammar));
203203
});
204204
}
205-
206-
/**
207-
* Given a produced TM scope, return the language that token describes or null if unknown.
208-
* e.g. source.html => html, source.css.embedded.html => css, punctuation.definition.tag.html => null
209-
*/
210-
public scopeToLanguage(scope: string): string {
211-
return this._scopeRegistry.scopeToLanguage(scope);
212-
}
213205
}
214206

215-
function createTokenizationSupport(modeId: string, grammar: IGrammar): ITokenizationSupport {
216-
var tokenizer = new Tokenizer(modeId, grammar);
207+
function createTokenizationSupport(scopeRegistry: TMScopeRegistry, modeId: string, grammar: IGrammar): ITokenizationSupport {
208+
var tokenizer = new Tokenizer(scopeRegistry, modeId, grammar);
217209
return {
218210
getInitialState: () => new TMState(modeId, null, null),
219211
tokenize: (line, state, offsetDelta?, stopAtOffset?) => tokenizer.tokenize(line, <TMState>state, offsetDelta, stopAtOffset)
220212
};
221213
}
222214

215+
/**
216+
* Data associated with a text mate scope as part of decoding.
217+
*
218+
* e.g.
219+
* For a scope "punctuation.definition.string.end.html", the tokens are: punctuation, definition, string, end, html.
220+
* Each of those tokens receive a unique numeric id, so instead of storing the token strings, we store the token ids.
221+
* Ultimately this means we store something like [23, 21, 12, 13, 1], considering those numbers to be the ids of the tokens.
222+
*/
223+
export class TMScopeDecodeData {
224+
_tmScopeDecodeDataBrand: void;
225+
226+
/**
227+
* The original text mate scope.
228+
*/
229+
public readonly scope: string;
230+
231+
/**
232+
* The language this scope belongs to.
233+
* e.g. source.html => html, source.css.embedded.html => css, punctuation.definition.tag.html => null
234+
*/
235+
public readonly language: string;
236+
237+
/**
238+
* The token ids this scope consists of.
239+
*/
240+
public readonly tokenIds: number[];
241+
242+
constructor(scope: string, language: string, tokenIds: number[]) {
243+
this.scope = scope;
244+
this.language = language;
245+
this.tokenIds = tokenIds;
246+
}
247+
}
248+
223249
export class DecodeMap {
224250
_decodeMapBrand: void;
225251

226-
lastAssignedId: number;
227-
scopeToTokenIds: { [scope: string]: number[]; };
228-
tokenToTokenId: { [token: string]: number; };
229-
tokenIdToToken: string[];
252+
private lastAssignedTokenId: number;
253+
private scopeRegistry: TMScopeRegistry;
254+
private readonly scopeToTokenIds: { [scope: string]: TMScopeDecodeData; };
255+
private readonly tokenToTokenId: { [token: string]: number; };
256+
private readonly tokenIdToToken: string[];
230257
prevToken: TMTokenDecodeData;
231258

232-
constructor() {
233-
this.lastAssignedId = 0;
259+
constructor(scopeRegistry: TMScopeRegistry) {
260+
this.lastAssignedTokenId = 0;
261+
this.scopeRegistry = scopeRegistry;
234262
this.scopeToTokenIds = Object.create(null);
235263
this.tokenToTokenId = Object.create(null);
236264
this.tokenIdToToken = [null];
237265
this.prevToken = new TMTokenDecodeData([], []);
238266
}
239267

240-
public getTokenIds(scope: string): number[] {
241-
let tokens = this.scopeToTokenIds[scope];
242-
if (tokens) {
243-
return tokens;
268+
private _getTokenId(token: string): number {
269+
let tokenId = this.tokenToTokenId[token];
270+
if (!tokenId) {
271+
tokenId = (++this.lastAssignedTokenId);
272+
this.tokenToTokenId[token] = tokenId;
273+
this.tokenIdToToken[tokenId] = token;
244274
}
245-
let tmpTokens = scope.split('.');
246-
247-
tokens = [];
248-
for (let i = 0; i < tmpTokens.length; i++) {
249-
let token = tmpTokens[i];
250-
let tokenId = this.tokenToTokenId[token];
251-
if (!tokenId) {
252-
tokenId = (++this.lastAssignedId);
253-
this.tokenToTokenId[token] = tokenId;
254-
this.tokenIdToToken[tokenId] = token;
255-
}
256-
tokens.push(tokenId);
275+
return tokenId;
276+
}
277+
278+
public decodeTMScope(scope: string): TMScopeDecodeData {
279+
let result = this.scopeToTokenIds[scope];
280+
if (result) {
281+
return result;
257282
}
258283

259-
this.scopeToTokenIds[scope] = tokens;
260-
return tokens;
284+
let scopePieces = scope.split('.');
285+
286+
let tokenIds: number[] = [];
287+
for (let i = 0; i < scopePieces.length; i++) {
288+
tokenIds[i] = this._getTokenId(scopePieces[i]);
289+
}
290+
291+
result = new TMScopeDecodeData(scope, this.scopeRegistry.scopeToLanguage(scope), tokenIds);
292+
this.scopeToTokenIds[scope] = result;
293+
return result;
261294
}
262295

263296
public getToken(tokenMap: boolean[]): string {
264297
let result = '';
265298
let isFirst = true;
266-
for (let i = 1; i <= this.lastAssignedId; i++) {
299+
for (let i = 1; i <= this.lastAssignedTokenId; i++) {
267300
if (tokenMap[i]) {
268301
if (isFirst) {
269302
isFirst = false;
@@ -281,8 +314,8 @@ export class DecodeMap {
281314
export class TMTokenDecodeData {
282315
_tmTokenDecodeDataBrand: void;
283316

284-
public scopes: string[];
285-
public scopeTokensMaps: boolean[][];
317+
public readonly scopes: string[];
318+
public readonly scopeTokensMaps: boolean[][];
286319

287320
constructor(scopes: string[], scopeTokensMaps: boolean[][]) {
288321
this.scopes = scopes;
@@ -304,10 +337,10 @@ class Tokenizer {
304337
private _modeId: string;
305338
private _decodeMap: DecodeMap;
306339

307-
constructor(modeId: string, grammar: IGrammar) {
340+
constructor(scopeRegistry: TMScopeRegistry, modeId: string, grammar: IGrammar) {
308341
this._modeId = modeId;
309342
this._grammar = grammar;
310-
this._decodeMap = new DecodeMap();
343+
this._decodeMap = new DecodeMap(scopeRegistry);
311344
}
312345

313346
public tokenize(line: string, state: TMState, offsetDelta: number = 0, stopAtOffset?: number): ILineTokens {
@@ -358,7 +391,7 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str
358391
let scopeTokensMaps: boolean[][] = [];
359392
let prevScopeTokensMaps: boolean[] = [];
360393
let sameAsPrev = true;
361-
for (let level = 1/* deliberately skip scope 0*/; level < scopes.length; level++) {
394+
for (let level = 1/* deliberately skip scope 0*/, scopesLength = scopes.length; level < scopesLength; level++) {
362395
let scope = scopes[level];
363396

364397
if (sameAsPrev) {
@@ -370,10 +403,11 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str
370403
sameAsPrev = false;
371404
}
372405

373-
let tokens = decodeMap.getTokenIds(scope);
406+
let decodedScope = decodeMap.decodeTMScope(scope);
407+
let decodedScopeTokens = decodedScope.tokenIds;
374408
prevScopeTokensMaps = prevScopeTokensMaps.slice(0);
375-
for (let i = 0; i < tokens.length; i++) {
376-
prevScopeTokensMaps[tokens[i]] = true;
409+
for (let i = 0, len = decodedScopeTokens.length; i < len; i++) {
410+
prevScopeTokensMaps[decodedScopeTokens[i]] = true;
377411
}
378412
scopeTokensMaps[level] = prevScopeTokensMaps;
379413
}

src/vs/editor/test/node/textMate/TMSyntax.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ suite('textMate', () => {
9898
}
9999

100100
function testDecodeTextMateToken(input: string[][], expected: string[]): void {
101-
let decodeMap = new DecodeMap();
101+
let decodeMap = new DecodeMap(new TMScopeRegistry());
102102

103103
for (let i = 0; i < input.length; i++) {
104104
testOneDecodeTextMateToken(decodeMap, input[i], expected[i]);

0 commit comments

Comments
 (0)