Skip to content

Commit a5a583e

Browse files
committed
Skip tokenizing with TM lines over 20k chars
1 parent 88c45c4 commit a5a583e

1 file changed

Lines changed: 20 additions & 4 deletions

File tree

src/vs/editor/electron-browser/textMate/TMSyntax.ts

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { ITextMateService } from 'vs/editor/node/textMate/textMateService';
2020
import { grammarsExtPoint, IEmbeddedLanguagesMap, ITMSyntaxExtensionPoint } from 'vs/editor/node/textMate/TMGrammars';
2121
import { TokenizationResult, TokenizationResult2 } from 'vs/editor/common/core/token';
2222
import { TokenMetadata } from 'vs/editor/common/model/tokensBinaryEncoding';
23+
import { nullTokenize2 } from 'vs/editor/common/modes/nullMode';
2324

2425
export class TMScopeRegistry {
2526

@@ -88,6 +89,12 @@ export class TMLanguageRegistration {
8889
}
8990
}
9091

92+
interface ICreateGrammarResult {
93+
languageId: LanguageId;
94+
grammar: IGrammar;
95+
containsEmbeddedLanguages: boolean;
96+
}
97+
9198
export class MainProcessTextMateSyntax implements ITextMateService {
9299
public _serviceBrand: any;
93100

@@ -226,18 +233,19 @@ export class MainProcessTextMateSyntax implements ITextMateService {
226233
return this._createGrammar(modeId).then(r => r.grammar);
227234
}
228235

229-
private _createGrammar(modeId: string): TPromise<{ grammar: IGrammar; containsEmbeddedLanguages: boolean; }> {
236+
private _createGrammar(modeId: string): TPromise<ICreateGrammarResult> {
230237
let scopeName = this._languageToScope[modeId];
231238
let languageRegistration = this._scopeRegistry.getLanguageRegistration(scopeName);
232239
let embeddedLanguages = this._resolveEmbeddedLanguages(languageRegistration.embeddedLanguages);
233240
let languageId = this._modeService.getLanguageIdentifier(modeId).id;
234241
let containsEmbeddedLanguages = (Object.keys(embeddedLanguages).length > 0);
235-
return new TPromise<{ grammar: IGrammar; containsEmbeddedLanguages: boolean; }>((c, e, p) => {
242+
return new TPromise<ICreateGrammarResult>((c, e, p) => {
236243
this._grammarRegistry.loadGrammarWithEmbeddedLanguages(scopeName, languageId, embeddedLanguages, (err, grammar) => {
237244
if (err) {
238245
return e(err);
239246
}
240247
c({
248+
languageId: languageId,
241249
grammar: grammar,
242250
containsEmbeddedLanguages: containsEmbeddedLanguages
243251
});
@@ -247,20 +255,22 @@ export class MainProcessTextMateSyntax implements ITextMateService {
247255

248256
private registerDefinition(modeId: string): void {
249257
this._createGrammar(modeId).then((r) => {
250-
TokenizationRegistry.register(modeId, new TMTokenization(this._scopeRegistry, r.grammar, r.containsEmbeddedLanguages));
258+
TokenizationRegistry.register(modeId, new TMTokenization(this._scopeRegistry, r.languageId, r.grammar, r.containsEmbeddedLanguages));
251259
}, onUnexpectedError);
252260
}
253261
}
254262

255263
class TMTokenization implements ITokenizationSupport {
256264

257265
private readonly _scopeRegistry: TMScopeRegistry;
266+
private readonly _languageId: LanguageId;
258267
private readonly _grammar: IGrammar;
259268
private readonly _containsEmbeddedLanguages: boolean;
260269
private readonly _seenLanguages: boolean[];
261270

262-
constructor(scopeRegistry: TMScopeRegistry, grammar: IGrammar, containsEmbeddedLanguages: boolean) {
271+
constructor(scopeRegistry: TMScopeRegistry, languageId: LanguageId, grammar: IGrammar, containsEmbeddedLanguages: boolean) {
263272
this._scopeRegistry = scopeRegistry;
273+
this._languageId = languageId;
264274
this._grammar = grammar;
265275
this._containsEmbeddedLanguages = containsEmbeddedLanguages;
266276
this._seenLanguages = [];
@@ -279,6 +289,12 @@ class TMTokenization implements ITokenizationSupport {
279289
throw new Error('Unexpected: offsetDelta should be 0.');
280290
}
281291

292+
// Do not attempt to tokenize if a line has over 20k
293+
if (line.length >= 20000) {
294+
console.log(`Line (${line.substr(0, 15)}...): longer than 20k characters, tokenization skipped.`);
295+
return nullTokenize2(this._languageId, line, state, offsetDelta);
296+
}
297+
282298
let textMateResult = this._grammar.tokenizeLine2(line, state);
283299

284300
if (this._containsEmbeddedLanguages) {

0 commit comments

Comments
 (0)