1- import os
2- import sys
31import collections
4- import importlib . machinery
2+ import tokenize # from stdlib
53
6- # Use Lib/token.py and Lib/tokenize.py to obtain the tokens. To maintain this
7- # compatible with older versions of Python, we need to make sure that we only
8- # import these two files (and not any of the dependencies of these files).
9-
10- CURRENT_FOLDER_LOCATION = os .path .dirname (os .path .realpath (__file__ ))
11- LIB_LOCATION = os .path .realpath (os .path .join (CURRENT_FOLDER_LOCATION , '..' , '..' , 'Lib' ))
12- TOKEN_LOCATION = os .path .join (LIB_LOCATION , 'token.py' )
13- TOKENIZE_LOCATION = os .path .join (LIB_LOCATION , 'tokenize.py' )
14-
15- token = importlib .machinery .SourceFileLoader ('token' ,
16- TOKEN_LOCATION ).load_module ()
17- # Add token to the module cache so tokenize.py uses that excact one instead of
18- # the one in the stdlib of the interpreter executing this file.
19- sys .modules ['token' ] = token
20- tokenize = importlib .machinery .SourceFileLoader ('tokenize' ,
21- TOKENIZE_LOCATION ).load_module ()
22-
23- from . import grammar
4+ from . import grammar , token
245
256class ParserGenerator (object ):
267
27- def __init__ (self , filename , stream = None , verbose = False ):
8+ def __init__ (self , grammar_file , token_file , stream = None , verbose = False ):
289 close_stream = None
2910 if stream is None :
30- stream = open (filename )
11+ stream = open (grammar_file )
3112 close_stream = stream .close
32- self .tokens = token
33- self .opmap = token .EXACT_TOKEN_TYPES
13+ with open (token_file ) as tok_file :
14+ token_lines = tok_file .readlines ()
15+ self .tokens = dict (token .generate_tokens (token_lines ))
16+ self .opmap = dict (token .generate_opmap (token_lines ))
3417 # Manually add <> so it does not collide with !=
35- self .opmap ['<>' ] = self . tokens . NOTEQUAL
18+ self .opmap ['<>' ] = " NOTEQUAL"
3619 self .verbose = verbose
37- self .filename = filename
20+ self .filename = grammar_file
3821 self .stream = stream
3922 self .generator = tokenize .generate_tokens (stream .readline )
4023 self .gettoken () # Initialize lookahead
@@ -108,9 +91,9 @@ def make_label(self, c, label):
10891 return ilabel
10992 else :
11093 # A named token (NAME, NUMBER, STRING)
111- itoken = getattr ( self .tokens , label , None )
94+ itoken = self .tokens . get ( label , None )
11295 assert isinstance (itoken , int ), label
113- assert itoken in self .tokens .tok_name , label
96+ assert itoken in self .tokens .values () , label
11497 if itoken in c .tokens :
11598 return c .tokens [itoken ]
11699 else :
@@ -126,12 +109,13 @@ def make_label(self, c, label):
126109 if value in c .keywords :
127110 return c .keywords [value ]
128111 else :
129- c .labels .append ((self .tokens . NAME , value ))
112+ c .labels .append ((self .tokens [ " NAME" ] , value ))
130113 c .keywords [value ] = ilabel
131114 return ilabel
132115 else :
133116 # An operator (any non-numeric token)
134- itoken = self .opmap [value ] # Fails if unknown token
117+ tok_name = self .opmap [value ] # Fails if unknown token
118+ itoken = self .tokens [tok_name ]
135119 if itoken in c .tokens :
136120 return c .tokens [itoken ]
137121 else :
@@ -184,16 +168,16 @@ def parse(self):
184168 dfas = collections .OrderedDict ()
185169 startsymbol = None
186170 # MSTART: (NEWLINE | RULE)* ENDMARKER
187- while self .type != self . tokens .ENDMARKER :
188- while self .type == self . tokens .NEWLINE :
171+ while self .type != tokenize .ENDMARKER :
172+ while self .type == tokenize .NEWLINE :
189173 self .gettoken ()
190174 # RULE: NAME ':' RHS NEWLINE
191- name = self .expect (self . tokens .NAME )
175+ name = self .expect (tokenize .NAME )
192176 if self .verbose :
193177 print ("Processing rule {dfa_name}" .format (dfa_name = name ))
194- self .expect (self . tokens .OP , ":" )
178+ self .expect (tokenize .OP , ":" )
195179 a , z = self .parse_rhs ()
196- self .expect (self . tokens .NEWLINE )
180+ self .expect (tokenize .NEWLINE )
197181 if self .verbose :
198182 self .dump_nfa (name , a , z )
199183 dfa = self .make_dfa (a , z )
@@ -309,7 +293,7 @@ def parse_alt(self):
309293 # ALT: ITEM+
310294 a , b = self .parse_item ()
311295 while (self .value in ("(" , "[" ) or
312- self .type in (self . tokens . NAME , self . tokens .STRING )):
296+ self .type in (tokenize . NAME , tokenize .STRING )):
313297 c , d = self .parse_item ()
314298 b .addarc (c )
315299 b = d
@@ -320,7 +304,7 @@ def parse_item(self):
320304 if self .value == "[" :
321305 self .gettoken ()
322306 a , z = self .parse_rhs ()
323- self .expect (self . tokens .OP , "]" )
307+ self .expect (tokenize .OP , "]" )
324308 a .addarc (z )
325309 return a , z
326310 else :
@@ -340,9 +324,9 @@ def parse_atom(self):
340324 if self .value == "(" :
341325 self .gettoken ()
342326 a , z = self .parse_rhs ()
343- self .expect (self . tokens .OP , ")" )
327+ self .expect (tokenize .OP , ")" )
344328 return a , z
345- elif self .type in (self . tokens . NAME , self . tokens .STRING ):
329+ elif self .type in (tokenize . NAME , tokenize .STRING ):
346330 a = NFAState ()
347331 z = NFAState ()
348332 a .addarc (z , self .value )
@@ -365,7 +349,7 @@ def gettoken(self):
365349 while tup [0 ] in (tokenize .COMMENT , tokenize .NL ):
366350 tup = next (self .generator )
367351 self .type , self .value , self .begin , self .end , self .line = tup
368- #print self.tokens[ 'tok_name'] [self.type], repr(self.value)
352+ # print(getattr(tokenize, 'tok_name') [self.type], repr(self.value) )
369353
370354 def raise_error (self , msg , * args ):
371355 if args :
0 commit comments