Skip to content

Commit 86b2e52

Browse files
committed
Generate mode transitions from TM tokenizer (microsoft#14136)
1 parent 8a45895 commit 86b2e52

3 files changed

Lines changed: 141 additions & 91 deletions

File tree

src/vs/editor/common/model/textModelWithTokens.ts

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -296,15 +296,9 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke
296296
var lineNumber = validPosition.lineNumber;
297297
var column = validPosition.column;
298298

299-
if (column === 1) {
300-
return this.getStateBeforeLine(lineNumber).getModeId();
301-
} else if (column === this.getLineMaxColumn(lineNumber)) {
302-
return this.getStateAfterLine(lineNumber).getModeId();
303-
} else {
304-
var modeTransitions = this._getLineModeTransitions(lineNumber);
305-
var modeTransitionIndex = ModeTransition.findIndexInSegmentsArray(modeTransitions, column - 1);
306-
return modeTransitions[modeTransitionIndex].modeId;
307-
}
299+
var modeTransitions = this._getLineModeTransitions(lineNumber);
300+
var modeTransitionIndex = ModeTransition.findIndexInSegmentsArray(modeTransitions, column - 1);
301+
return modeTransitions[modeTransitionIndex].modeId;
308302
}
309303

310304
protected _invalidateLine(lineIndex: number): void {
@@ -400,20 +394,6 @@ export class TextModelWithTokens extends TextModel implements editorCommon.IToke
400394
});
401395
}
402396

403-
private getStateBeforeLine(lineNumber: number): IState {
404-
this._withModelTokensChangedEventBuilder((eventBuilder) => {
405-
this._updateTokensUntilLine(eventBuilder, lineNumber - 1, true);
406-
});
407-
return this._lines[lineNumber - 1].getState();
408-
}
409-
410-
private getStateAfterLine(lineNumber: number): IState {
411-
this._withModelTokensChangedEventBuilder((eventBuilder) => {
412-
this._updateTokensUntilLine(eventBuilder, lineNumber, true);
413-
});
414-
return lineNumber < this._lines.length ? this._lines[lineNumber].getState() : this._lastState;
415-
}
416-
417397
_getLineModeTransitions(lineNumber: number): ModeTransition[] {
418398
if (lineNumber < 1 || lineNumber > this.getLineCount()) {
419399
throw new Error('Illegal value ' + lineNumber + ' for `lineNumber`');

src/vs/editor/node/textMate/TMSyntax.ts

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ export class TMScopesDecodeData {
266266
/**
267267
* The resolved language.
268268
*/
269-
private readonly language: string;
269+
public readonly language: string;
270270

271271
constructor(parent: TMScopesDecodeData, scope: TMScopeDecodeData) {
272272
// 1) Inherit data from `parent`.
@@ -345,7 +345,7 @@ export class DecodeMap {
345345
public getToken(tokenMap: boolean[]): string {
346346
let result = '';
347347
let isFirst = true;
348-
for (let i = 1; i <= this.lastAssignedTokenId; i++) {
348+
for (let i = 1, len = tokenMap.length; i < len; i++) {
349349
if (tokenMap[i]) {
350350
if (isFirst) {
351351
isFirst = false;
@@ -400,34 +400,53 @@ class Tokenizer {
400400
}
401401

402402
export function decodeTextMateTokens(line: string, offsetDelta: number, decodeMap: DecodeMap, resultTokens: IToken[], resultState: TMState): LineTokens {
403+
const topLevelModeId = resultState.getModeId();
404+
403405
// Create the result early and fill in the tokens later
404406
let tokens: Token[] = [];
407+
let modeTransitions: ModeTransition[] = [];
405408

406409
let lastTokenType: string = null;
410+
let lastModeId: string = null;
411+
407412
for (let tokenIndex = 0, len = resultTokens.length; tokenIndex < len; tokenIndex++) {
408413
let token = resultTokens[tokenIndex];
409414
let tokenStartIndex = token.startIndex;
410-
let tokenType = decodeTextMateToken(decodeMap, token.scopes);
415+
416+
let tokenType = '';
417+
let tokenModeId = topLevelModeId;
418+
let decodedToken = decodeTextMateToken(decodeMap, token.scopes);
419+
if (decodedToken) {
420+
tokenType = decodeMap.getToken(decodedToken.tokensMask);
421+
if (decodedToken.language) {
422+
tokenModeId = decodedToken.language;
423+
}
424+
}
411425

412426
// do not push a new token if the type is exactly the same (also helps with ligatures)
413427
if (tokenType !== lastTokenType) {
414428
tokens.push(new Token(tokenStartIndex + offsetDelta, tokenType));
415429
lastTokenType = tokenType;
416430
}
431+
432+
if (tokenModeId !== lastModeId) {
433+
modeTransitions.push(new ModeTransition(tokenStartIndex + offsetDelta, tokenModeId));
434+
lastModeId = tokenModeId;
435+
}
417436
}
418437

419438
return new LineTokens(
420439
tokens,
421-
[new ModeTransition(offsetDelta, resultState.getModeId())],
440+
modeTransitions,
422441
offsetDelta + line.length,
423442
resultState
424443
);
425444
}
426445

427-
export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): string {
446+
export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): TMScopesDecodeData {
428447
if (scopes.length <= 1) {
429448
// fast case
430-
return '';
449+
return null;
431450
}
432451

433452
const prevTokenScopes = decodeMap.prevTokenScopes;
@@ -456,5 +475,5 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str
456475
}
457476

458477
decodeMap.prevTokenScopes = resultScopes;
459-
return decodeMap.getToken(lastResultScope.tokensMask);
478+
return lastResultScope;
460479
}

src/vs/editor/test/node/textMate/TMSyntax.test.ts

Lines changed: 112 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -11,69 +11,109 @@ import { TMState } from 'vs/editor/common/modes/TMState';
1111
suite('TextMate.TMScopeRegistry', () => {
1212

1313
test('getFilePath', () => {
14-
let manager = new TMScopeRegistry();
15-
16-
manager.register('a', 'source.a', './grammar/a.tmLanguage');
17-
assert.equal(manager.getFilePath('source.a'), './grammar/a.tmLanguage');
18-
assert.equal(manager.getFilePath('a'), null);
19-
assert.equal(manager.getFilePath('source.b'), null);
20-
assert.equal(manager.getFilePath('b'), null);
21-
22-
manager.register('b', 'source.b', './grammar/b.tmLanguage');
23-
assert.equal(manager.getFilePath('source.a'), './grammar/a.tmLanguage');
24-
assert.equal(manager.getFilePath('a'), null);
25-
assert.equal(manager.getFilePath('source.b'), './grammar/b.tmLanguage');
26-
assert.equal(manager.getFilePath('b'), null);
27-
28-
manager.register('a', 'source.a', './grammar/ax.tmLanguage');
29-
assert.equal(manager.getFilePath('source.a'), './grammar/ax.tmLanguage');
30-
assert.equal(manager.getFilePath('a'), null);
31-
assert.equal(manager.getFilePath('source.b'), './grammar/b.tmLanguage');
32-
assert.equal(manager.getFilePath('b'), null);
14+
let registry = new TMScopeRegistry();
15+
16+
registry.register('a', 'source.a', './grammar/a.tmLanguage');
17+
assert.equal(registry.getFilePath('source.a'), './grammar/a.tmLanguage');
18+
assert.equal(registry.getFilePath('a'), null);
19+
assert.equal(registry.getFilePath('source.b'), null);
20+
assert.equal(registry.getFilePath('b'), null);
21+
22+
registry.register('b', 'source.b', './grammar/b.tmLanguage');
23+
assert.equal(registry.getFilePath('source.a'), './grammar/a.tmLanguage');
24+
assert.equal(registry.getFilePath('a'), null);
25+
assert.equal(registry.getFilePath('source.b'), './grammar/b.tmLanguage');
26+
assert.equal(registry.getFilePath('b'), null);
27+
28+
registry.register('a', 'source.a', './grammar/ax.tmLanguage');
29+
assert.equal(registry.getFilePath('source.a'), './grammar/ax.tmLanguage');
30+
assert.equal(registry.getFilePath('a'), null);
31+
assert.equal(registry.getFilePath('source.b'), './grammar/b.tmLanguage');
32+
assert.equal(registry.getFilePath('b'), null);
3333
});
3434

3535
test('scopeToLanguage', () => {
36-
let manager = new TMScopeRegistry();
36+
let registry = new TMScopeRegistry();
3737

38-
assert.equal(manager.scopeToLanguage('source.html'), null);
38+
assert.equal(registry.scopeToLanguage('source.html'), null);
3939

40-
manager.register('html', 'source.html', null);
41-
manager.register('c', 'source.c', null);
42-
manager.register('css', 'source.css', null);
43-
manager.register('javascript', 'source.js', null);
44-
manager.register('python', 'source.python', null);
45-
manager.register('smarty', 'source.smarty', null);
46-
manager.register(null, 'source.baz', null);
40+
registry.register('html', 'source.html', null);
41+
registry.register('c', 'source.c', null);
42+
registry.register('css', 'source.css', null);
43+
registry.register('javascript', 'source.js', null);
44+
registry.register('python', 'source.python', null);
45+
registry.register('smarty', 'source.smarty', null);
46+
registry.register(null, 'source.baz', null);
4747

4848
// exact matches
49-
assert.equal(manager.scopeToLanguage('source.html'), 'html');
50-
assert.equal(manager.scopeToLanguage('source.css'), 'css');
51-
assert.equal(manager.scopeToLanguage('source.c'), 'c');
52-
assert.equal(manager.scopeToLanguage('source.js'), 'javascript');
53-
assert.equal(manager.scopeToLanguage('source.python'), 'python');
54-
assert.equal(manager.scopeToLanguage('source.smarty'), 'smarty');
49+
assert.equal(registry.scopeToLanguage('source.html'), 'html');
50+
assert.equal(registry.scopeToLanguage('source.css'), 'css');
51+
assert.equal(registry.scopeToLanguage('source.c'), 'c');
52+
assert.equal(registry.scopeToLanguage('source.js'), 'javascript');
53+
assert.equal(registry.scopeToLanguage('source.python'), 'python');
54+
assert.equal(registry.scopeToLanguage('source.smarty'), 'smarty');
5555

5656
// prefix matches
57-
assert.equal(manager.scopeToLanguage('source.css.embedded.html'), 'css');
58-
assert.equal(manager.scopeToLanguage('source.js.embedded.html'), 'javascript');
59-
assert.equal(manager.scopeToLanguage('source.python.embedded.html'), 'python');
60-
assert.equal(manager.scopeToLanguage('source.smarty.embedded.html'), 'smarty');
57+
assert.equal(registry.scopeToLanguage('source.css.embedded.html'), 'css');
58+
assert.equal(registry.scopeToLanguage('source.js.embedded.html'), 'javascript');
59+
assert.equal(registry.scopeToLanguage('source.python.embedded.html'), 'python');
60+
assert.equal(registry.scopeToLanguage('source.smarty.embedded.html'), 'smarty');
6161

6262
// misses
63-
assert.equal(manager.scopeToLanguage('source.ts'), null);
64-
assert.equal(manager.scopeToLanguage('source.csss'), null);
65-
assert.equal(manager.scopeToLanguage('source.baz'), null);
66-
assert.equal(manager.scopeToLanguage('asource.css'), null);
67-
assert.equal(manager.scopeToLanguage('a.source.css'), null);
68-
assert.equal(manager.scopeToLanguage('source_css'), null);
69-
assert.equal(manager.scopeToLanguage('punctuation.definition.tag.html'), null);
63+
assert.equal(registry.scopeToLanguage('source.ts'), null);
64+
assert.equal(registry.scopeToLanguage('source.csss'), null);
65+
assert.equal(registry.scopeToLanguage('source.baz'), null);
66+
assert.equal(registry.scopeToLanguage('asource.css'), null);
67+
assert.equal(registry.scopeToLanguage('a.source.css'), null);
68+
assert.equal(registry.scopeToLanguage('source_css'), null);
69+
assert.equal(registry.scopeToLanguage('punctuation.definition.tag.html'), null);
7070
});
7171

7272
});
7373

7474
suite('TextMate.decodeTextMateTokens', () => {
7575

76-
test('html and embedded modes', () => {
76+
test('embedded modes', () => {
77+
let registry = new TMScopeRegistry();
78+
79+
registry.register('html', 'source.html', null);
80+
registry.register('c', 'source.c', null);
81+
registry.register('css', 'source.css', null);
82+
registry.register('javascript', 'source.js', null);
83+
registry.register('python', 'source.python', null);
84+
registry.register('smarty', 'source.smarty', null);
85+
registry.register(null, 'source.baz', null);
86+
87+
let decodeMap = new DecodeMap(registry);
88+
let actual = decodeTextMateTokens(
89+
'text<style>body{}</style><script>var x=3;</script>text',
90+
0,
91+
decodeMap,
92+
[
93+
{ startIndex: 0, endIndex: 4, scopes: ['source.html'] },
94+
{ startIndex: 4, endIndex: 11, scopes: ['source.html', 'style.tag.open'] },
95+
{ startIndex: 11, endIndex: 17, scopes: ['source.html', 'source.css'] },
96+
{ startIndex: 17, endIndex: 25, scopes: ['source.html', 'style.tag.close'] },
97+
{ startIndex: 25, endIndex: 33, scopes: ['source.html', 'script.tag.open'] },
98+
{ startIndex: 33, endIndex: 41, scopes: ['source.html', 'source.js'] },
99+
{ startIndex: 41, endIndex: 50, scopes: ['source.html', 'script.tag.close'] },
100+
{ startIndex: 50, endIndex: 54, scopes: ['source.html'] },
101+
],
102+
new TMState('html', null, null)
103+
);
104+
105+
let actualModeTransitions = actual.modeTransitions.map((t) => { return { startIndex: t.startIndex, modeId: t.modeId }; });
106+
107+
assert.deepEqual(actualModeTransitions, [
108+
{ startIndex: 0, modeId: 'html' },
109+
{ startIndex: 11, modeId: 'css' },
110+
{ startIndex: 17, modeId: 'html' },
111+
{ startIndex: 33, modeId: 'javascript' },
112+
{ startIndex: 41, modeId: 'html' },
113+
]);
114+
});
115+
116+
test('html and embedded', () => {
77117

78118
var tests = [
79119
{
@@ -233,7 +273,7 @@ suite('TextMate.decodeTextMateTokens', () => {
233273
{ startIndex: 22, type: 'html.punctuation.definition.end.string.quoted.double.source.css.embedded' },
234274
{ startIndex: 23, type: 'tag.html.punctuation.definition.source.css.embedded' },
235275
],
236-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
276+
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
237277
}, {
238278
line: '\t\th1 {',
239279
tmTokens: [
@@ -248,7 +288,7 @@ suite('TextMate.decodeTextMateTokens', () => {
248288
{ startIndex: 4, type: 'meta.html.source.css.embedded.selector' },
249289
{ startIndex: 5, type: 'meta.html.punctuation.begin.source.css.embedded.property-list.section' },
250290
],
251-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
291+
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
252292
}, {
253293
line: '\t\t\tcolor: #CCA3A3;',
254294
tmTokens: [
@@ -269,7 +309,7 @@ suite('TextMate.decodeTextMateTokens', () => {
269309
{ startIndex: 11, type: 'meta.html.other.source.css.embedded.property-list.property-value.constant.color.rgb-value' },
270310
{ startIndex: 17, type: 'meta.html.punctuation.source.css.embedded.property-list.property-value.terminator.rule' },
271311
],
272-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
312+
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
273313
}, {
274314
line: '\t\t}',
275315
tmTokens: [
@@ -280,7 +320,7 @@ suite('TextMate.decodeTextMateTokens', () => {
280320
{ startIndex: 0, type: 'meta.html.source.css.embedded.property-list' },
281321
{ startIndex: 2, type: 'meta.html.punctuation.end.source.css.embedded.property-list.section' },
282322
],
283-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
323+
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
284324
}, {
285325
line: '\t</style>',
286326
tmTokens: [
@@ -295,7 +335,7 @@ suite('TextMate.decodeTextMateTokens', () => {
295335
{ startIndex: 3, type: 'tag.html.entity.name.source.css.embedded.style' },
296336
{ startIndex: 8, type: 'tag.html.punctuation.definition.source.css.embedded' },
297337
],
298-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
338+
modeTransitions: [{ startIndex: 0, modeId: 'css' }]
299339
}, {
300340
line: '\t<script type=\"text/javascript\">',
301341
tmTokens: [
@@ -322,7 +362,7 @@ suite('TextMate.decodeTextMateTokens', () => {
322362
{ startIndex: 30, type: 'html.punctuation.definition.end.string.quoted.double.source.embedded.js' },
323363
{ startIndex: 31, type: 'tag.html.punctuation.definition.source.embedded.js' },
324364
],
325-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
365+
modeTransitions: [{ startIndex: 0, modeId: 'javascript' }]
326366
}, {
327367
line: '\t\twindow.alert(\"I am a sample...\");',
328368
tmTokens: [
@@ -349,22 +389,24 @@ suite('TextMate.decodeTextMateTokens', () => {
349389
{ startIndex: 33, type: 'meta.html.source.embedded.js.brace.round' },
350390
{ startIndex: 34, type: 'html.punctuation.source.embedded.terminator.js.statement' },
351391
],
352-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
392+
modeTransitions: [{ startIndex: 0, modeId: 'javascript' }]
353393
}, {
354-
line: '\t</script>',
394+
line: '\t</script>After',
355395
tmTokens: [
356396
{ startIndex: 0, endIndex: 1, scopes: ['text.html.basic', 'source.js.embedded.html'] },
357397
{ startIndex: 1, endIndex: 3, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] },
358398
{ startIndex: 3, endIndex: 9, scopes: ['text.html.basic', 'source.js.embedded.html', 'entity.name.tag.script.html'] },
359-
{ startIndex: 9, endIndex: 10, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] }
399+
{ startIndex: 9, endIndex: 10, scopes: ['text.html.basic', 'source.js.embedded.html', 'punctuation.definition.tag.html'] },
400+
{ startIndex: 10, endIndex: 15, scopes: ['text.html.basic'] }
360401
],
361402
tokens: [
362403
{ startIndex: 0, type: 'html.source.embedded.js' },
363404
{ startIndex: 1, type: 'tag.html.punctuation.definition.source.embedded.js' },
364405
{ startIndex: 3, type: 'tag.html.entity.name.source.embedded.js.script' },
365406
{ startIndex: 9, type: 'tag.html.punctuation.definition.source.embedded.js' },
407+
{ startIndex: 10, type: '' },
366408
],
367-
modeTransitions: [{ startIndex: 0, modeId: 'html' }]
409+
modeTransitions: [{ startIndex: 0, modeId: 'javascript' }, { startIndex: 10, modeId: 'html' }]
368410
}, {
369411
line: '</head>',
370412
tmTokens: [
@@ -486,15 +528,23 @@ suite('TextMate.decodeTextMateTokens', () => {
486528
}
487529
];
488530

489-
let decodeMap = new DecodeMap(new TMScopeRegistry());
531+
let registry = new TMScopeRegistry();
532+
533+
registry.register('html', 'source.html', null);
534+
registry.register('c', 'source.c', null);
535+
registry.register('css', 'source.css', null);
536+
registry.register('javascript', 'source.js', null);
537+
registry.register('python', 'source.python', null);
538+
registry.register('smarty', 'source.smarty', null);
539+
registry.register(null, 'source.baz', null);
490540

491-
let state = new TMState('html', null, null);
541+
let decodeMap = new DecodeMap(registry);
492542

493543
for (let i = 0, len = tests.length; i < len; i++) {
494544
let test = tests[i];
495-
let actual = decodeTextMateTokens(test.line, 0, decodeMap, test.tmTokens, state);
545+
let actual = decodeTextMateTokens(test.line, 0, decodeMap, test.tmTokens, new TMState('html', null, null));
496546

497-
let actualTokens = actual.tokens.map((t) => { return { startIndex: t.startIndex, type: t.type}; });
547+
let actualTokens = actual.tokens.map((t) => { return { startIndex: t.startIndex, type: t.type }; });
498548
let actualModeTransitions = actual.modeTransitions.map((t) => { return { startIndex: t.startIndex, modeId: t.modeId }; });
499549

500550
assert.deepEqual(actualTokens, test.tokens, 'test ' + test.line);
@@ -526,7 +576,8 @@ suite('textMate', () => {
526576
}
527577

528578
function testOneDecodeTextMateToken(decodeMap: DecodeMap, scopes: string[], expected: string): void {
529-
let actual = decodeTextMateToken(decodeMap, scopes);
579+
let actualDecodedToken = decodeTextMateToken(decodeMap, scopes);
580+
let actual = actualDecodedToken ? decodeMap.getToken(actualDecodedToken.tokensMask) : '';
530581
assert.equal(actual, expected);
531582

532583
// Sanity-check

0 commit comments

Comments
 (0)