Skip to content

Commit 1c29cc1

Browse files
committed
feat: new parso implementation.
1 parent 8a61553 commit 1c29cc1

File tree

1 file changed

+50
-203
lines changed

1 file changed

+50
-203
lines changed

pythonparser

Lines changed: 50 additions & 203 deletions
Original file line numberDiff line numberDiff line change
@@ -1,206 +1,53 @@
1-
#!/usr/bin/env python2.7
1+
#!/usr/bin/env python3
22

3+
from xml.dom import minidom
4+
import parso
35
import sys
4-
import json as json
5-
import ast
6-
import jsontree
7-
import asttokens
8-
from xml.sax.saxutils import quoteattr
96

10-
def PrintUsage():
11-
sys.stderr.write("""
12-
Usage:
13-
parse_python.py <file>
14-
15-
""")
16-
exit(1)
17-
18-
def read_file_to_string(filename):
19-
f = open(filename, 'rt')
20-
s = f.read()
21-
f.close()
22-
return s
23-
24-
25-
def parse_file(filename):
26-
tree = asttokens.ASTTokens(read_file_to_string(filename), parse=True).tree
27-
28-
json_tree = []
29-
def gen_identifier(identifier, node_type = 'identifier', node=None):
30-
pos = len(json_tree)
31-
json_node = {}
32-
json_tree.append(json_node)
33-
json_node['type'] = node_type
34-
json_node['value'] = identifier
35-
36-
json_pos_and_length = extract_pos_and_length(node, node)
37-
json_node['pos'] = str(json_pos_and_length[0])
38-
json_node['length'] = str(json_pos_and_length[1])
39-
40-
return pos
41-
42-
def traverse_list(l, node_type = 'list', node = None):
43-
pos = len(json_tree)
44-
json_node = {}
45-
json_tree.append(json_node)
46-
json_node['type'] = node_type
47-
48-
if (len(l) > 0):
49-
json_pos_and_length = extract_pos_and_length(l[0], l[-1])
50-
else:
51-
json_pos_and_length = extract_pos_and_length(node, node)
52-
json_node['pos'] = str(json_pos_and_length[0])
53-
json_node['length'] = str(json_pos_and_length[1])
54-
55-
children = []
56-
for item in l:
57-
children.append(traverse(item))
58-
if (len(children) != 0):
59-
json_node['children'] = children
60-
61-
return pos
62-
63-
def traverse(node):
64-
pos = len(json_tree)
65-
json_node = {}
66-
json_tree.append(json_node)
67-
json_node['type'] = type(node).__name__
68-
69-
json_pos_and_length = extract_pos_and_length(node, node)
70-
json_node['pos'] = str(json_pos_and_length[0])
71-
json_node['length'] = str(json_pos_and_length[1])
72-
73-
children = []
74-
if isinstance(node, ast.Name):
75-
json_node['value'] = node.id
76-
elif isinstance(node, ast.Num):
77-
json_node['value'] = unicode(node.n)
78-
elif isinstance(node, ast.Str):
79-
json_node['value'] = node.s.decode('utf-8')
80-
elif isinstance(node, ast.alias):
81-
json_node['value'] = unicode(node.name)
82-
if node.asname:
83-
children.append(gen_identifier(node.asname, node = node))
84-
elif isinstance(node, ast.FunctionDef):
85-
json_node['value'] = unicode(node.name)
86-
elif isinstance(node, ast.ClassDef):
87-
json_node['value'] = unicode(node.name)
88-
elif isinstance(node, ast.ImportFrom):
89-
if node.module:
90-
json_node['value'] = unicode(node.module)
91-
elif isinstance(node, ast.Global):
92-
for n in node.names:
93-
children.append(gen_identifier(n, node = node))
94-
elif isinstance(node, ast.keyword):
95-
json_node['value'] = unicode(node.arg)
96-
97-
98-
# Process children.
99-
if isinstance(node, ast.For):
100-
children.append(traverse(node.target))
101-
children.append(traverse(node.iter))
102-
children.append(traverse_list(node.body, 'body', node))
103-
if node.orelse:
104-
children.append(traverse_list(node.orelse, 'orelse', node))
105-
elif isinstance(node, ast.If) or isinstance(node, ast.While):
106-
children.append(traverse(node.test))
107-
children.append(traverse_list(node.body, 'body', node))
108-
if node.orelse:
109-
children.append(traverse_list(node.orelse, 'orelse', node))
110-
elif isinstance(node, ast.With):
111-
children.append(traverse(node.context_expr))
112-
if node.optional_vars:
113-
children.append(traverse(node.optional_vars))
114-
children.append(traverse_list(node.body, 'body', node))
115-
elif isinstance(node, ast.TryExcept):
116-
children.append(traverse_list(node.body, 'body', node))
117-
children.append(traverse_list(node.handlers, 'handlers', node))
118-
if node.orelse:
119-
children.append(traverse_list(node.orelse, 'orelse', node))
120-
elif isinstance(node, ast.TryFinally):
121-
children.append(traverse_list(node.body, 'body', node))
122-
children.append(traverse_list(node.finalbody, 'finalbody', node))
123-
elif isinstance(node, ast.arguments):
124-
children.append(traverse_list(node.args, 'args', node))
125-
children.append(traverse_list(node.defaults, 'defaults', node))
126-
if node.vararg:
127-
children.append(gen_identifier(node.vararg, 'vararg', node))
128-
if node.kwarg:
129-
children.append(gen_identifier(node.kwarg, 'kwarg', node))
130-
elif isinstance(node, ast.ExceptHandler):
131-
if node.type:
132-
children.append(traverse_list([node.type], 'type', node))
133-
if node.name:
134-
children.append(traverse_list([node.name], 'name', node))
135-
children.append(traverse_list(node.body, 'body', node))
136-
elif isinstance(node, ast.ClassDef):
137-
children.append(traverse_list(node.bases, 'bases', node))
138-
children.append(traverse_list(node.body, 'body', node))
139-
children.append(traverse_list(node.decorator_list, 'decorator_list', node))
140-
elif isinstance(node, ast.FunctionDef):
141-
children.append(traverse(node.args))
142-
children.append(traverse_list(node.body, 'body', node))
143-
children.append(traverse_list(node.decorator_list, 'decorator_list', node))
144-
else:
145-
# Default handling: iterate over children.
146-
for child in ast.iter_child_nodes(node):
147-
if isinstance(child, ast.expr_context) or isinstance(child, ast.operator) or isinstance(child, ast.boolop) or isinstance(child, ast.unaryop) or isinstance(child, ast.cmpop):
148-
# Directly include expr_context, and operators into the type instead of creating a child.
149-
json_node['type'] = json_node['type'] + type(child).__name__
150-
else:
151-
children.append(traverse(child))
152-
153-
if isinstance(node, ast.Attribute):
154-
children.append(gen_identifier(node.attr, 'attr', node))
155-
156-
if (len(children) != 0):
157-
json_node['children'] = children
158-
159-
return pos
160-
161-
def extract_pos_and_length(node, other_node):
162-
try:
163-
return [node.startpos, other_node.endpos - node.startpos]
164-
except:
165-
try:
166-
return [node.first_token.startpos, other_node.last_token.endpos - node.first_token.startpos]
167-
except:
168-
pass
169-
return [-1, -1]
170-
171-
traverse(tree)
172-
return json.dumps(json_tree, separators=(',', ':'), ensure_ascii=False)
173-
174-
def write(i, indent_level = 0):
175-
global lines
176-
indent_string = ' '
177-
indent = indent_string * indent_level
178-
node = tree[i]
179-
label_attr = ' label=' + quoteEscape(node["value"]) if node['value'] else ''
180-
lines.append(indent + '<tree type="' + node['type'] + '"' + label_attr + ' pos="' + str(node['pos']) + '" length="' + str(node['length']) + '">')
181-
for child in node["children"]:
182-
write(int(child), indent_level + 1)
183-
lines.append(indent + '</tree>')
184-
185-
def quoteEscape(x):
186-
return quoteattr(x);
187-
188-
if __name__ == "__main__":
189-
try:
190-
text = open(sys.argv[1], "r+").read()
191-
192-
json_file = parse_file(sys.argv[1])
193-
tree = jsontree.JSONTreeDecoder().decode(json_file)
194-
195-
x = tree[0]
196-
x['length'] = len(text)
197-
lines = []
198-
lines.append("<root>")
199-
lines.append("<context></context>")
200-
write(0)
201-
lines.append("</root>")
202-
203-
print('\n'.join(lines))
204-
205-
except (UnicodeEncodeError, UnicodeDecodeError):
206-
pass
7+
doc = minidom.Document()
8+
positions = [0]
9+
10+
def main(file):
11+
parsoAst = parso.parse(readFile(file))
12+
gumtreeAst = toGumtreeNode(parsoAst)
13+
doc.appendChild(gumtreeAst)
14+
processNode(parsoAst, gumtreeAst)
15+
xml = doc.toprettyxml()
16+
print(xml)
17+
18+
def processNode(parsoNode, gumtreeNode):
19+
for parsoChild in parsoNode.children:
20+
gumtreeChild = toGumtreeNode(parsoChild)
21+
if gumtreeChild != None:
22+
gumtreeNode.appendChild(gumtreeChild)
23+
if hasattr(parsoChild, 'children'):
24+
processNode(parsoChild, gumtreeChild)
25+
26+
def toGumtreeNode(parsoNode):
27+
if parsoNode.type in ['keyword', 'newline', 'endmarker']:
28+
return
29+
if parsoNode.type == 'operator' and parsoNode.value in ['.', '(', ')', '[', ']', ':', ';']:
30+
return
31+
gumtreeNode = doc.createElement('tree')
32+
gumtreeNode.setAttribute("type", parsoNode.type)
33+
startPos = positions[parsoNode.start_pos[0] - 1] + parsoNode.start_pos[1]
34+
endPos = positions[parsoNode.end_pos[0] - 1] + parsoNode.end_pos[1]
35+
length = endPos - startPos
36+
gumtreeNode.setAttribute("pos", str(startPos))
37+
gumtreeNode.setAttribute("length", str(length))
38+
if (not hasattr(parsoNode, 'children')) or len(parsoNode.children) == 0:
39+
gumtreeNode.setAttribute("label", parsoNode.value)
40+
return gumtreeNode
41+
42+
def readFile(file):
43+
with open(file, 'r') as file:
44+
data = file.read()
45+
index = 0
46+
for chr in data:
47+
index += 1
48+
if chr == '\n':
49+
positions.append(index)
50+
return data
51+
52+
if __name__ == '__main__':
53+
main(sys.argv[1])

0 commit comments

Comments
 (0)