@@ -72,9 +72,9 @@ export class TMScopeRegistry {
7272 public register ( language : string , scopeName : string , filePath : string ) : void {
7373 this . _scopeNameToFilePath [ scopeName ] = filePath ;
7474 if ( language ) {
75- this . _scopeNameToLanguage [ scopeName ] = language ;
75+ this . _scopeNameToLanguage [ scopeName ] = language ;
7676 this . _cachedScopesRegex = null ;
77- }
77+ }
7878 }
7979
8080 public getFilePath ( scopeName : string ) : string {
@@ -199,71 +199,104 @@ export class MainProcessTextMateSyntax {
199199 return ;
200200 }
201201
202- TokenizationRegistry . register ( modeId , createTokenizationSupport ( modeId , grammar ) ) ;
202+ TokenizationRegistry . register ( modeId , createTokenizationSupport ( this . _scopeRegistry , modeId , grammar ) ) ;
203203 } ) ;
204204 }
205-
206- /**
207- * Given a produced TM scope, return the language that token describes or null if unknown.
208- * e.g. source.html => html, source.css.embedded.html => css, punctuation.definition.tag.html => null
209- */
210- public scopeToLanguage ( scope : string ) : string {
211- return this . _scopeRegistry . scopeToLanguage ( scope ) ;
212- }
213205}
214206
215- function createTokenizationSupport ( modeId : string , grammar : IGrammar ) : ITokenizationSupport {
216- var tokenizer = new Tokenizer ( modeId , grammar ) ;
207+ function createTokenizationSupport ( scopeRegistry : TMScopeRegistry , modeId : string , grammar : IGrammar ) : ITokenizationSupport {
208+ var tokenizer = new Tokenizer ( scopeRegistry , modeId , grammar ) ;
217209 return {
218210 getInitialState : ( ) => new TMState ( modeId , null , null ) ,
219211 tokenize : ( line , state , offsetDelta ?, stopAtOffset ?) => tokenizer . tokenize ( line , < TMState > state , offsetDelta , stopAtOffset )
220212 } ;
221213}
222214
215+ /**
216+ * Data associated with a text mate scope as part of decoding.
217+ *
218+ * e.g.
219+ * For a scope "punctuation.definition.string.end.html", the tokens are: punctuation, definition, string, end, html.
220+ * Each of those tokens receive a unique numeric id, so instead of storing the token strings, we store the token ids.
221+ * Ultimately this means we store something like [23, 21, 12, 13, 1], considering those numbers to be the ids of the tokens.
222+ */
223+ export class TMScopeDecodeData {
224+ _tmScopeDecodeDataBrand : void ;
225+
226+ /**
227+ * The original text mate scope.
228+ */
229+ public readonly scope : string ;
230+
231+ /**
232+ * The language this scope belongs to.
233+ * e.g. source.html => html, source.css.embedded.html => css, punctuation.definition.tag.html => null
234+ */
235+ public readonly language : string ;
236+
237+ /**
238+ * The token ids this scope consists of.
239+ */
240+ public readonly tokenIds : number [ ] ;
241+
242+ constructor ( scope : string , language : string , tokenIds : number [ ] ) {
243+ this . scope = scope ;
244+ this . language = language ;
245+ this . tokenIds = tokenIds ;
246+ }
247+ }
248+
223249export class DecodeMap {
224250 _decodeMapBrand : void ;
225251
226- lastAssignedId : number ;
227- scopeToTokenIds : { [ scope : string ] : number [ ] ; } ;
228- tokenToTokenId : { [ token : string ] : number ; } ;
229- tokenIdToToken : string [ ] ;
252+ private lastAssignedTokenId : number ;
253+ private scopeRegistry : TMScopeRegistry ;
254+ private readonly scopeToTokenIds : { [ scope : string ] : TMScopeDecodeData ; } ;
255+ private readonly tokenToTokenId : { [ token : string ] : number ; } ;
256+ private readonly tokenIdToToken : string [ ] ;
230257 prevToken : TMTokenDecodeData ;
231258
232- constructor ( ) {
233- this . lastAssignedId = 0 ;
259+ constructor ( scopeRegistry : TMScopeRegistry ) {
260+ this . lastAssignedTokenId = 0 ;
261+ this . scopeRegistry = scopeRegistry ;
234262 this . scopeToTokenIds = Object . create ( null ) ;
235263 this . tokenToTokenId = Object . create ( null ) ;
236264 this . tokenIdToToken = [ null ] ;
237265 this . prevToken = new TMTokenDecodeData ( [ ] , [ ] ) ;
238266 }
239267
240- public getTokenIds ( scope : string ) : number [ ] {
241- let tokens = this . scopeToTokenIds [ scope ] ;
242- if ( tokens ) {
243- return tokens ;
268+ private _getTokenId ( token : string ) : number {
269+ let tokenId = this . tokenToTokenId [ token ] ;
270+ if ( ! tokenId ) {
271+ tokenId = ( ++ this . lastAssignedTokenId ) ;
272+ this . tokenToTokenId [ token ] = tokenId ;
273+ this . tokenIdToToken [ tokenId ] = token ;
244274 }
245- let tmpTokens = scope . split ( '.' ) ;
246-
247- tokens = [ ] ;
248- for ( let i = 0 ; i < tmpTokens . length ; i ++ ) {
249- let token = tmpTokens [ i ] ;
250- let tokenId = this . tokenToTokenId [ token ] ;
251- if ( ! tokenId ) {
252- tokenId = ( ++ this . lastAssignedId ) ;
253- this . tokenToTokenId [ token ] = tokenId ;
254- this . tokenIdToToken [ tokenId ] = token ;
255- }
256- tokens . push ( tokenId ) ;
275+ return tokenId ;
276+ }
277+
278+ public decodeTMScope ( scope : string ) : TMScopeDecodeData {
279+ let result = this . scopeToTokenIds [ scope ] ;
280+ if ( result ) {
281+ return result ;
257282 }
258283
259- this . scopeToTokenIds [ scope ] = tokens ;
260- return tokens ;
284+ let scopePieces = scope . split ( '.' ) ;
285+
286+ let tokenIds : number [ ] = [ ] ;
287+ for ( let i = 0 ; i < scopePieces . length ; i ++ ) {
288+ tokenIds [ i ] = this . _getTokenId ( scopePieces [ i ] ) ;
289+ }
290+
291+ result = new TMScopeDecodeData ( scope , this . scopeRegistry . scopeToLanguage ( scope ) , tokenIds ) ;
292+ this . scopeToTokenIds [ scope ] = result ;
293+ return result ;
261294 }
262295
263296 public getToken ( tokenMap : boolean [ ] ) : string {
264297 let result = '' ;
265298 let isFirst = true ;
266- for ( let i = 1 ; i <= this . lastAssignedId ; i ++ ) {
299+ for ( let i = 1 ; i <= this . lastAssignedTokenId ; i ++ ) {
267300 if ( tokenMap [ i ] ) {
268301 if ( isFirst ) {
269302 isFirst = false ;
@@ -281,8 +314,8 @@ export class DecodeMap {
281314export class TMTokenDecodeData {
282315 _tmTokenDecodeDataBrand : void ;
283316
284- public scopes : string [ ] ;
285- public scopeTokensMaps : boolean [ ] [ ] ;
317+ public readonly scopes : string [ ] ;
318+ public readonly scopeTokensMaps : boolean [ ] [ ] ;
286319
287320 constructor ( scopes : string [ ] , scopeTokensMaps : boolean [ ] [ ] ) {
288321 this . scopes = scopes ;
@@ -304,10 +337,10 @@ class Tokenizer {
304337 private _modeId : string ;
305338 private _decodeMap : DecodeMap ;
306339
307- constructor ( modeId : string , grammar : IGrammar ) {
340+ constructor ( scopeRegistry : TMScopeRegistry , modeId : string , grammar : IGrammar ) {
308341 this . _modeId = modeId ;
309342 this . _grammar = grammar ;
310- this . _decodeMap = new DecodeMap ( ) ;
343+ this . _decodeMap = new DecodeMap ( scopeRegistry ) ;
311344 }
312345
313346 public tokenize ( line : string , state : TMState , offsetDelta : number = 0 , stopAtOffset ?: number ) : ILineTokens {
@@ -358,7 +391,7 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str
358391 let scopeTokensMaps : boolean [ ] [ ] = [ ] ;
359392 let prevScopeTokensMaps : boolean [ ] = [ ] ;
360393 let sameAsPrev = true ;
361- for ( let level = 1 /* deliberately skip scope 0*/ ; level < scopes . length ; level ++ ) {
394+ for ( let level = 1 /* deliberately skip scope 0*/ , scopesLength = scopes . length ; level < scopesLength ; level ++ ) {
362395 let scope = scopes [ level ] ;
363396
364397 if ( sameAsPrev ) {
@@ -370,10 +403,11 @@ export function decodeTextMateToken(decodeMap: DecodeMap, scopes: string[]): str
370403 sameAsPrev = false ;
371404 }
372405
373- let tokens = decodeMap . getTokenIds ( scope ) ;
406+ let decodedScope = decodeMap . decodeTMScope ( scope ) ;
407+ let decodedScopeTokens = decodedScope . tokenIds ;
374408 prevScopeTokensMaps = prevScopeTokensMaps . slice ( 0 ) ;
375- for ( let i = 0 ; i < tokens . length ; i ++ ) {
376- prevScopeTokensMaps [ tokens [ i ] ] = true ;
409+ for ( let i = 0 , len = decodedScopeTokens . length ; i < len ; i ++ ) {
410+ prevScopeTokensMaps [ decodedScopeTokens [ i ] ] = true ;
377411 }
378412 scopeTokensMaps [ level ] = prevScopeTokensMaps ;
379413 }
0 commit comments