1- #!/usr/bin/env python2.7
1+ #!/usr/bin/env python3
22
3+ from xml .dom import minidom
4+ import parso
35import sys
4- import json as json
5- import ast
6- import jsontree
7- import asttokens
8- from xml .sax .saxutils import quoteattr
96
10- def PrintUsage ():
11- sys .stderr .write ("""
12- Usage:
13- parse_python.py <file>
14-
15- """ )
16- exit (1 )
17-
18- def read_file_to_string (filename ):
19- f = open (filename , 'rt' )
20- s = f .read ()
21- f .close ()
22- return s
23-
24-
25- def parse_file (filename ):
26- tree = asttokens .ASTTokens (read_file_to_string (filename ), parse = True ).tree
27-
28- json_tree = []
29- def gen_identifier (identifier , node_type = 'identifier' , node = None ):
30- pos = len (json_tree )
31- json_node = {}
32- json_tree .append (json_node )
33- json_node ['type' ] = node_type
34- json_node ['value' ] = identifier
35-
36- json_pos_and_length = extract_pos_and_length (node , node )
37- json_node ['pos' ] = str (json_pos_and_length [0 ])
38- json_node ['length' ] = str (json_pos_and_length [1 ])
39-
40- return pos
41-
42- def traverse_list (l , node_type = 'list' , node = None ):
43- pos = len (json_tree )
44- json_node = {}
45- json_tree .append (json_node )
46- json_node ['type' ] = node_type
47-
48- if (len (l ) > 0 ):
49- json_pos_and_length = extract_pos_and_length (l [0 ], l [- 1 ])
50- else :
51- json_pos_and_length = extract_pos_and_length (node , node )
52- json_node ['pos' ] = str (json_pos_and_length [0 ])
53- json_node ['length' ] = str (json_pos_and_length [1 ])
54-
55- children = []
56- for item in l :
57- children .append (traverse (item ))
58- if (len (children ) != 0 ):
59- json_node ['children' ] = children
60-
61- return pos
62-
63- def traverse (node ):
64- pos = len (json_tree )
65- json_node = {}
66- json_tree .append (json_node )
67- json_node ['type' ] = type (node ).__name__
68-
69- json_pos_and_length = extract_pos_and_length (node , node )
70- json_node ['pos' ] = str (json_pos_and_length [0 ])
71- json_node ['length' ] = str (json_pos_and_length [1 ])
72-
73- children = []
74- if isinstance (node , ast .Name ):
75- json_node ['value' ] = node .id
76- elif isinstance (node , ast .Num ):
77- json_node ['value' ] = unicode (node .n )
78- elif isinstance (node , ast .Str ):
79- json_node ['value' ] = node .s .decode ('utf-8' )
80- elif isinstance (node , ast .alias ):
81- json_node ['value' ] = unicode (node .name )
82- if node .asname :
83- children .append (gen_identifier (node .asname , node = node ))
84- elif isinstance (node , ast .FunctionDef ):
85- json_node ['value' ] = unicode (node .name )
86- elif isinstance (node , ast .ClassDef ):
87- json_node ['value' ] = unicode (node .name )
88- elif isinstance (node , ast .ImportFrom ):
89- if node .module :
90- json_node ['value' ] = unicode (node .module )
91- elif isinstance (node , ast .Global ):
92- for n in node .names :
93- children .append (gen_identifier (n , node = node ))
94- elif isinstance (node , ast .keyword ):
95- json_node ['value' ] = unicode (node .arg )
96-
97-
98- # Process children.
99- if isinstance (node , ast .For ):
100- children .append (traverse (node .target ))
101- children .append (traverse (node .iter ))
102- children .append (traverse_list (node .body , 'body' , node ))
103- if node .orelse :
104- children .append (traverse_list (node .orelse , 'orelse' , node ))
105- elif isinstance (node , ast .If ) or isinstance (node , ast .While ):
106- children .append (traverse (node .test ))
107- children .append (traverse_list (node .body , 'body' , node ))
108- if node .orelse :
109- children .append (traverse_list (node .orelse , 'orelse' , node ))
110- elif isinstance (node , ast .With ):
111- children .append (traverse (node .context_expr ))
112- if node .optional_vars :
113- children .append (traverse (node .optional_vars ))
114- children .append (traverse_list (node .body , 'body' , node ))
115- elif isinstance (node , ast .TryExcept ):
116- children .append (traverse_list (node .body , 'body' , node ))
117- children .append (traverse_list (node .handlers , 'handlers' , node ))
118- if node .orelse :
119- children .append (traverse_list (node .orelse , 'orelse' , node ))
120- elif isinstance (node , ast .TryFinally ):
121- children .append (traverse_list (node .body , 'body' , node ))
122- children .append (traverse_list (node .finalbody , 'finalbody' , node ))
123- elif isinstance (node , ast .arguments ):
124- children .append (traverse_list (node .args , 'args' , node ))
125- children .append (traverse_list (node .defaults , 'defaults' , node ))
126- if node .vararg :
127- children .append (gen_identifier (node .vararg , 'vararg' , node ))
128- if node .kwarg :
129- children .append (gen_identifier (node .kwarg , 'kwarg' , node ))
130- elif isinstance (node , ast .ExceptHandler ):
131- if node .type :
132- children .append (traverse_list ([node .type ], 'type' , node ))
133- if node .name :
134- children .append (traverse_list ([node .name ], 'name' , node ))
135- children .append (traverse_list (node .body , 'body' , node ))
136- elif isinstance (node , ast .ClassDef ):
137- children .append (traverse_list (node .bases , 'bases' , node ))
138- children .append (traverse_list (node .body , 'body' , node ))
139- children .append (traverse_list (node .decorator_list , 'decorator_list' , node ))
140- elif isinstance (node , ast .FunctionDef ):
141- children .append (traverse (node .args ))
142- children .append (traverse_list (node .body , 'body' , node ))
143- children .append (traverse_list (node .decorator_list , 'decorator_list' , node ))
144- else :
145- # Default handling: iterate over children.
146- for child in ast .iter_child_nodes (node ):
147- if isinstance (child , ast .expr_context ) or isinstance (child , ast .operator ) or isinstance (child , ast .boolop ) or isinstance (child , ast .unaryop ) or isinstance (child , ast .cmpop ):
148- # Directly include expr_context, and operators into the type instead of creating a child.
149- json_node ['type' ] = json_node ['type' ] + type (child ).__name__
150- else :
151- children .append (traverse (child ))
152-
153- if isinstance (node , ast .Attribute ):
154- children .append (gen_identifier (node .attr , 'attr' , node ))
155-
156- if (len (children ) != 0 ):
157- json_node ['children' ] = children
158-
159- return pos
160-
161- def extract_pos_and_length (node , other_node ):
162- try :
163- return [node .startpos , other_node .endpos - node .startpos ]
164- except :
165- try :
166- return [node .first_token .startpos , other_node .last_token .endpos - node .first_token .startpos ]
167- except :
168- pass
169- return [- 1 , - 1 ]
170-
171- traverse (tree )
172- return json .dumps (json_tree , separators = (',' , ':' ), ensure_ascii = False )
173-
174- def write (i , indent_level = 0 ):
175- global lines
176- indent_string = ' '
177- indent = indent_string * indent_level
178- node = tree [i ]
179- label_attr = ' label=' + quoteEscape (node ["value" ]) if node ['value' ] else ''
180- lines .append (indent + '<tree type="' + node ['type' ] + '"' + label_attr + ' pos="' + str (node ['pos' ]) + '" length="' + str (node ['length' ]) + '">' )
181- for child in node ["children" ]:
182- write (int (child ), indent_level + 1 )
183- lines .append (indent + '</tree>' )
184-
185- def quoteEscape (x ):
186- return quoteattr (x );
187-
188- if __name__ == "__main__" :
189- try :
190- text = open (sys .argv [1 ], "r+" ).read ()
191-
192- json_file = parse_file (sys .argv [1 ])
193- tree = jsontree .JSONTreeDecoder ().decode (json_file )
194-
195- x = tree [0 ]
196- x ['length' ] = len (text )
197- lines = []
198- lines .append ("<root>" )
199- lines .append ("<context></context>" )
200- write (0 )
201- lines .append ("</root>" )
202-
203- print ('\n ' .join (lines ))
204-
205- except (UnicodeEncodeError , UnicodeDecodeError ):
206- pass
7+ doc = minidom .Document ()
8+ positions = [0 ]
9+
10+ def main (file ):
11+ parsoAst = parso .parse (readFile (file ))
12+ gumtreeAst = toGumtreeNode (parsoAst )
13+ doc .appendChild (gumtreeAst )
14+ processNode (parsoAst , gumtreeAst )
15+ xml = doc .toprettyxml ()
16+ print (xml )
17+
18+ def processNode (parsoNode , gumtreeNode ):
19+ for parsoChild in parsoNode .children :
20+ gumtreeChild = toGumtreeNode (parsoChild )
21+ if gumtreeChild != None :
22+ gumtreeNode .appendChild (gumtreeChild )
23+ if hasattr (parsoChild , 'children' ):
24+ processNode (parsoChild , gumtreeChild )
25+
26+ def toGumtreeNode (parsoNode ):
27+ if parsoNode .type in ['keyword' , 'newline' , 'endmarker' ]:
28+ return
29+ if parsoNode .type == 'operator' and parsoNode .value in ['.' , '(' , ')' , '[' , ']' , ':' , ';' ]:
30+ return
31+ gumtreeNode = doc .createElement ('tree' )
32+ gumtreeNode .setAttribute ("type" , parsoNode .type )
33+ startPos = positions [parsoNode .start_pos [0 ] - 1 ] + parsoNode .start_pos [1 ]
34+ endPos = positions [parsoNode .end_pos [0 ] - 1 ] + parsoNode .end_pos [1 ]
35+ length = endPos - startPos
36+ gumtreeNode .setAttribute ("pos" , str (startPos ))
37+ gumtreeNode .setAttribute ("length" , str (length ))
38+ if (not hasattr (parsoNode , 'children' )) or len (parsoNode .children ) == 0 :
39+ gumtreeNode .setAttribute ("label" , parsoNode .value )
40+ return gumtreeNode
41+
42+ def readFile (file ):
43+ with open (file , 'r' ) as file :
44+ data = file .read ()
45+ index = 0
46+ for chr in data :
47+ index += 1
48+ if chr == '\n ' :
49+ positions .append (index )
50+ return data
51+
52+ if __name__ == '__main__' :
53+ main (sys .argv [1 ])
0 commit comments