Skip to content

Commit 3b45a32

Browse files
committed
Add tokenization API
1 parent 3767bd1 commit 3b45a32

4 files changed

Lines changed: 69 additions & 2 deletions

File tree

src/vs/editor/browser/standalone/standaloneEditor.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import { IContextKeyService } from 'vs/platform/contextkey/common/contextkey';
3333
import { ICodeEditorService } from 'vs/editor/common/services/codeEditorService';
3434
import { IEditorWorkerService } from 'vs/editor/common/services/editorWorkerService';
3535
import { ITextModelResolverService } from 'vs/editor/common/services/resolverService';
36+
import { IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/modes';
37+
import { NULL_STATE, nullTokenize } from 'vs/editor/common/modes/nullMode';
3638

3739
/**
3840
* @internal
@@ -255,6 +257,53 @@ export function colorizeModelLine(model: IModel, lineNumber: number, tabSize: nu
255257
return Colorizer.colorizeModelLine(model, lineNumber, tabSize);
256258
}
257259

260+
export class Token {
261+
public readonly offset: number;
262+
public readonly type: string;
263+
264+
constructor(offset: number, type: string) {
265+
this.offset = offset;
266+
this.type = type;
267+
}
268+
}
269+
270+
/**
271+
* @internal
272+
*/
273+
function getSafeTokenizationSupport(languageId: string): ITokenizationSupport {
274+
let tokenizationSupport = TokenizationRegistry.get(languageId);
275+
if (tokenizationSupport) {
276+
return tokenizationSupport;
277+
}
278+
return {
279+
getInitialState: () => NULL_STATE,
280+
tokenize: (line: string, state: IState, deltaOffset: number) => nullTokenize(languageId, line, state, deltaOffset),
281+
tokenize3: undefined,
282+
};
283+
}
284+
285+
/**
286+
* Tokenize `text` using language `languageId`
287+
*/
288+
export function tokenize(text: string, languageId: string): Token[][] {
289+
let modeService = StaticServices.modeService.get();
290+
// Needed in order to get the mode registered for subsequent look-ups
291+
modeService.getOrCreateMode(languageId);
292+
293+
let tokenizationSupport = getSafeTokenizationSupport(languageId);
294+
let lines = text.split(/\r\n|\r|\n/);
295+
let result: Token[][] = [];
296+
let state = tokenizationSupport.getInitialState();
297+
for (let i = 0, len = lines.length; i < len; i++) {
298+
let line = lines[i];
299+
let tokenizationResult = tokenizationSupport.tokenize(line, state, 0);
300+
301+
result[i] = tokenizationResult.tokens.map((t) => new Token(t.startIndex, t.type));
302+
state = tokenizationResult.endState;
303+
}
304+
return result;
305+
}
306+
258307
/**
259308
* @internal
260309
*/
@@ -279,6 +328,7 @@ export function createMonacoEditorAPI(): typeof monaco.editor {
279328
colorizeElement: colorizeElement,
280329
colorize: colorize,
281330
colorizeModelLine: colorizeModelLine,
331+
tokenize: tokenize,
282332

283333
// enums
284334
ScrollbarVisibility: ScrollbarVisibility,
@@ -306,6 +356,7 @@ export function createMonacoEditorAPI(): typeof monaco.editor {
306356
BareFontInfo: <any>editorCommon.BareFontInfo,
307357
FontInfo: <any>editorCommon.FontInfo,
308358
TextModelResolvedOptions: <any>editorCommon.TextModelResolvedOptions,
359+
Token: Token,
309360

310361
// vars
311362
EditorType: editorCommon.EditorType,

src/vs/editor/common/modes/monarch/monarchLexer.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
451451
if (!rules) {
452452
rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching
453453
if (!rules) {
454-
monarchCommon.throwError(this._lexer, 'tokenizer state is not defined: ' + state);
454+
monarchCommon.throwError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);
455455
}
456456
}
457457

@@ -485,7 +485,7 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
485485
}
486486

487487
if (!hasEmbeddedPopRule) {
488-
monarchCommon.throwError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state);
488+
monarchCommon.throwError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);
489489
}
490490

491491
return popOffset;

src/vs/editor/common/services/languagesRegistry.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import * as strings from 'vs/base/common/strings';
1111
import { ModesRegistry } from 'vs/editor/common/modes/modesRegistry';
1212
import { ILanguageExtensionPoint } from 'vs/editor/common/services/modeService';
1313
import { LanguageId, LanguageIdentifier } from 'vs/editor/common/modes';
14+
import { NULL_MODE_ID, NULL_LANGUAGE_IDENTIFIER } from 'vs/editor/common/modes/nullMode';
1415

1516
var hasOwnProperty = Object.prototype.hasOwnProperty;
1617

@@ -254,6 +255,10 @@ export class LanguagesRegistry {
254255
}
255256

256257
public getLanguageIdentifier(_modeId: string | LanguageId): LanguageIdentifier {
258+
if (_modeId === NULL_MODE_ID || _modeId === LanguageId.Null) {
259+
return NULL_LANGUAGE_IDENTIFIER;
260+
}
261+
257262
let modeId: string;
258263
if (typeof _modeId === 'string') {
259264
modeId = _modeId;

src/vs/monaco.d.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,17 @@ declare module monaco.editor {
853853
*/
854854
export function colorizeModelLine(model: IModel, lineNumber: number, tabSize?: number): string;
855855

856+
export class Token {
857+
readonly offset: number;
858+
readonly type: string;
859+
constructor(offset: number, type: string);
860+
}
861+
862+
/**
863+
* Tokenize `text` using language `languageId`
864+
*/
865+
export function tokenize(text: string, languageId: string): Token[][];
866+
856867
/**
857868
* A web worker that can provide a proxy to an arbitrary file.
858869
*/

0 commit comments

Comments
 (0)