@@ -446,24 +446,44 @@ class Tokenizer {
446446 private _grammar : IGrammar ;
447447 private _modeId : string ;
448448 private _decodeMap : DecodeMap ;
449+ private _stackOverflowReported : boolean ;
449450
450451 constructor ( languageRegistration : TMLanguageRegistration , modeId : string , grammar : IGrammar ) {
451452 this . _modeId = modeId ;
452453 this . _grammar = grammar ;
453454 this . _decodeMap = new DecodeMap ( languageRegistration ) ;
455+ this . _stackOverflowReported = false ;
454456 }
455457
456458 public tokenize ( line : string , state : TMState , offsetDelta : number = 0 , stopAtOffset ?: number ) : ILineTokens {
459+
457460 // Do not attempt to tokenize if a line has over 20k
461+ if ( line . length >= 20000 ) {
462+ console . log ( `Line (${ line . substr ( 0 , 15 ) } ...): longer than 20k characters, tokenization skipped.` ) ;
463+ return new RawLineTokens (
464+ [ new Token ( offsetDelta , '' ) ] ,
465+ [ new ModeTransition ( offsetDelta , this . _modeId ) ] ,
466+ offsetDelta ,
467+ state
468+ ) ;
469+ }
470+
458471 // or if the rule stack contains more than 100 rules (indicator of broken grammar that forgets to pop rules)
459- if ( line . length >= 20000 || depth ( state . ruleStack ) > 100 ) {
472+ if ( depth ( state . ruleStack ) > 100 ) {
473+ if ( ! this . _stackOverflowReported ) {
474+ // don't report again unless a good state has been reached again.
475+ console . log ( `Line (${ line . substr ( 0 , 15 ) } ...): stack deeper than 100: tokenization stopped.` , printRuleStack ( state . ruleStack ) ) ;
476+ this . _stackOverflowReported = true ;
477+ }
460478 return new RawLineTokens (
461479 [ new Token ( offsetDelta , '' ) ] ,
462480 [ new ModeTransition ( offsetDelta , this . _modeId ) ] ,
463481 offsetDelta ,
464482 state
465483 ) ;
466484 }
485+ this . _stackOverflowReported = false ;
486+
467487 let textMateResult = this . _grammar . tokenizeLine ( line , state . ruleStack ) ;
468488
469489 let endState : TMState ;
@@ -478,6 +498,16 @@ class Tokenizer {
478498 }
479499}
480500
501+ // TODO: replace with something like ruleStack.toDebugString
502+ function printRuleStack ( ruleStack : StackElement ) : string [ ] {
503+ let scopes = [ ] ;
504+ while ( ruleStack ) {
505+ scopes . push ( ruleStack [ '_scopeName' ] ) ;
506+ ruleStack = ruleStack . _parent ;
507+ }
508+ return scopes ;
509+ }
510+
481511export function decodeTextMateTokens ( topLevelModeId : string , decodeMap : DecodeMap , line : string , offsetDelta : number , resultTokens : IToken [ ] , resultState : TMState ) : RawLineTokens {
482512
483513 // Create the result early and fill in the tokens later
0 commit comments