Skip to content

Commit 61246fb

Browse files
committed
Optimize decodeTextMateToken (microsoft#4828)
1 parent e25312d commit 61246fb

2 files changed

Lines changed: 203 additions & 43 deletions

File tree

src/vs/editor/node/textMate/TMSyntax.ts

Lines changed: 92 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import {ILineTokens, IMode, IToken, ITokenizationSupport} from 'vs/editor/common
1212
import {TMState} from 'vs/editor/common/modes/TMState';
1313
import {Token} from 'vs/editor/common/modes/supports';
1414
import {IModeService} from 'vs/editor/common/services/modeService';
15-
import {IGrammar, ITMToken, Registry} from 'vscode-textmate';
15+
import {IGrammar, Registry} from 'vscode-textmate';
1616

1717
export interface ITMSyntaxExtensionPoint {
1818
language: string;
@@ -126,15 +126,78 @@ function createTokenizationSupport(mode: IMode, grammar: IGrammar): ITokenizatio
126126
};
127127
}
128128

129+
export class DecodeMap {
130+
_decodeMapTrait: void;
129131

132+
lastAssignedId: number;
133+
scopeToTokenIds: { [scope:string]:number[]; };
134+
tokenToTokenId: { [token:string]:number; };
135+
tokenIdToToken: string[];
136+
prevToken: TMTokenDecodeData;
137+
138+
constructor() {
139+
this.lastAssignedId = 0;
140+
this.scopeToTokenIds = Object.create(null);
141+
this.tokenToTokenId = Object.create(null);
142+
this.tokenIdToToken = [null];
143+
this.prevToken = new TMTokenDecodeData([], []);
144+
}
145+
146+
public getTokenIds(scope:string): number[] {
147+
let tokens = this.scopeToTokenIds[scope];
148+
if (tokens) {
149+
return tokens;
150+
}
151+
let tmpTokens = scope.split('.');
152+
153+
tokens = [];
154+
for (let i = 0; i < tmpTokens.length; i++) {
155+
let token = tmpTokens[i];
156+
let tokenId = this.tokenToTokenId[token];
157+
if (!tokenId) {
158+
tokenId = (++this.lastAssignedId);
159+
this.tokenToTokenId[token] = tokenId;
160+
this.tokenIdToToken[tokenId] = token + ' ';
161+
}
162+
tokens.push(tokenId);
163+
}
164+
165+
this.scopeToTokenIds[scope] = tokens;
166+
return tokens;
167+
}
168+
169+
public getToken(tokenMap:boolean[]): string {
170+
let result = '';
171+
for (let i = 1; i < this.lastAssignedId; i++) {
172+
if (tokenMap[i]) {
173+
result += this.tokenIdToToken[i];
174+
}
175+
}
176+
return result;
177+
}
178+
}
179+
180+
export class TMTokenDecodeData {
181+
_tmTokenDecodeDataTrait: void;
182+
183+
public scopes: string[];
184+
public scopeTokensMaps: boolean[][];
185+
186+
constructor(scopes:string[], scopeTokensMaps:boolean[][]) {
187+
this.scopes = scopes;
188+
this.scopeTokensMaps = scopeTokensMaps;
189+
}
190+
}
130191

131192
class Tokenizer {
132193
private _grammar: IGrammar;
133194
private _modeId: string;
195+
private _decodeMap: DecodeMap;
134196

135197
constructor(modeId:string, grammar: IGrammar) {
136198
this._modeId = modeId;
137199
this._grammar = grammar;
200+
this._decodeMap = new DecodeMap();
138201
}
139202

140203
public tokenize(line: string, state: TMState, offsetDelta: number = 0, stopAtOffset?: number): ILineTokens {
@@ -165,61 +228,47 @@ class Tokenizer {
165228
for (let tokenIndex = 0, len = textMateResult.tokens.length; tokenIndex < len; tokenIndex++) {
166229
let token = textMateResult.tokens[tokenIndex];
167230
let tokenStartIndex = token.startIndex;
168-
let t = decodeTextMateToken(this._modeId, token);
231+
let tokenType = decodeTextMateToken(this._decodeMap, token.scopes);
169232

170233
// do not push a new token if the type is exactly the same (also helps with ligatures)
171-
if (t.tokenType !== lastTokenType) {
172-
ret.tokens.push(new Token(tokenStartIndex + offsetDelta, t.tokenType));
173-
lastTokenType = t.tokenType;
234+
if (tokenType !== lastTokenType) {
235+
ret.tokens.push(new Token(tokenStartIndex + offsetDelta, tokenType));
236+
lastTokenType = tokenType;
174237
}
175238
}
176239

177240
return ret;
178241
}
179242
}
180243

181-
function decodeTextMateToken(modeId:string, entry: ITMToken) {
182-
let tokenTypeArray: string[] = [];
183-
for (let level = 1 /* deliberately skip scope 0*/; level < entry.scopes.length; ++level) {
184-
tokenTypeArray = tokenTypeArray.concat(entry.scopes[level].split('.'));
185-
}
186-
let modeToken = '';
187-
if (entry.scopes.length > 0) {
188-
let dotIndex = entry.scopes[0].lastIndexOf('.');
189-
if (dotIndex >= 0) {
190-
modeToken = entry.scopes[0].substr(dotIndex + 1);
191-
}
192-
}
193-
let tokenTypes: string[] = [];
194-
dedupTokens(tokenTypeArray, modeToken, tokenTypes);
244+
export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): string {
245+
const prevTokenScopes = decodeMap.prevToken.scopes;
246+
const prevTokenScopesLength = prevTokenScopes.length;
247+
const prevTokenScopeTokensMaps = decodeMap.prevToken.scopeTokensMaps;
195248

196-
return {
197-
tokenType: tokenTypes.join('.'),
198-
modeToken: modeId
199-
};
200-
}
201-
202-
/**
203-
* Remove duplicate entries, collect result in `result`, place `modeToken` at the end
204-
* and detect if this is a comment => return true if it looks like a comment
205-
*/
206-
function dedupTokens(tokenTypeArray:string[], modeToken:string, result:string[]): void {
207-
208-
tokenTypeArray.sort();
209-
210-
var prev:string = null,
211-
curr:string = null;
249+
let scopeTokensMaps: boolean[][] = [];
250+
let prevScopeTokensMaps: boolean[] = [];
251+
let sameAsPrev = true;
252+
for (let level = 1/* deliberately skip scope 0*/; level < scopes.length; level++) {
253+
let scope = scopes[level];
212254

213-
for (var i = 0, len = tokenTypeArray.length; i < len; i++) {
214-
prev = curr;
215-
curr = tokenTypeArray[i];
216-
217-
if (curr === prev || curr === modeToken) {
218-
continue;
255+
if (sameAsPrev) {
256+
if (level < prevTokenScopesLength && prevTokenScopes[level] === scope) {
257+
prevScopeTokensMaps = prevTokenScopeTokensMaps[level];
258+
scopeTokensMaps[level] = prevScopeTokensMaps;
259+
continue;
260+
}
261+
sameAsPrev = false;
219262
}
220263

221-
result.push(curr);
264+
let tokens = decodeMap.getTokenIds(scope);
265+
prevScopeTokensMaps = prevScopeTokensMaps.slice(0);
266+
for (let i = 0; i < tokens.length; i++) {
267+
prevScopeTokensMaps[tokens[i]] = true;
268+
}
269+
scopeTokensMaps[level] = prevScopeTokensMaps;
222270
}
223271

224-
result.push(modeToken);
272+
decodeMap.prevToken = new TMTokenDecodeData(scopes, scopeTokensMaps);
273+
return decodeMap.getToken(prevScopeTokensMaps);
225274
}

0 commit comments

Comments
 (0)