Skip to content

Commit 121b750

Browse files
committed
update python parser to produce the new gumtree xml format
1 parent ab87fd3 commit 121b750

File tree

1 file changed

+43
-76
lines changed

1 file changed

+43
-76
lines changed

pythonparser

Lines changed: 43 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def read_file_to_string(filename):
2323

2424

2525
def parse_file(filename):
26-
global c, d
2726
tree = asttokens.ASTTokens(read_file_to_string(filename), parse=True).tree
2827

2928
json_tree = []
@@ -33,50 +32,44 @@ def parse_file(filename):
3332
json_tree.append(json_node)
3433
json_node['type'] = node_type
3534
json_node['value'] = identifier
36-
try:
37-
json_node['line_no'] = str(node.lineno)
38-
json_node['col'] = str(node.col_offset)
39-
except:
40-
try:
41-
json_node['line_no'] = str(node.first_token.start[0])
42-
json_node['col'] = str(node.first_token.start[1])
43-
except:
44-
pass
35+
36+
json_pos_and_length = extract_pos_and_length(node, node)
37+
json_node['pos'] = str(json_pos_and_length[0])
38+
json_node['length'] = str(json_pos_and_length[1])
39+
4540
return pos
4641

4742
def traverse_list(l, node_type = 'list', node = None):
4843
pos = len(json_tree)
4944
json_node = {}
5045
json_tree.append(json_node)
5146
json_node['type'] = node_type
52-
try:
53-
json_node['line_no'] = str(node.lineno)
54-
json_node['col'] = str(node.col_offset)
55-
except:
56-
try:
57-
json_node['line_no'] = str(node.first_token.start[0])
58-
json_node['col'] = str(node.first_token.start[1])
59-
except: pass
47+
48+
if (len(l) > 0):
49+
json_pos_and_length = extract_pos_and_length(l[0], l[-1])
50+
else:
51+
json_pos_and_length = extract_pos_and_length(node, node)
52+
json_node['pos'] = str(json_pos_and_length[0])
53+
json_node['length'] = str(json_pos_and_length[1])
54+
6055
children = []
6156
for item in l:
6257
children.append(traverse(item))
6358
if (len(children) != 0):
6459
json_node['children'] = children
60+
6561
return pos
6662

6763
def traverse(node):
6864
pos = len(json_tree)
6965
json_node = {}
7066
json_tree.append(json_node)
7167
json_node['type'] = type(node).__name__
72-
try:
73-
json_node['line_no'] = str(node.lineno)
74-
json_node['col'] = str(node.col_offset)
75-
except:
76-
try:
77-
json_node['line_no'] = str(node.first_token.start[0])
78-
json_node['col'] = str(node.first_token.start[1])
79-
except: pass
68+
69+
json_pos_and_length = extract_pos_and_length(node, node)
70+
json_node['pos'] = str(json_pos_and_length[0])
71+
json_node['length'] = str(json_pos_and_length[1])
72+
8073
children = []
8174
if isinstance(node, ast.Name):
8275
json_node['value'] = node.id
@@ -162,64 +155,36 @@ def parse_file(filename):
162155

163156
if (len(children) != 0):
164157
json_node['children'] = children
158+
165159
return pos
166160

161+
def extract_pos_and_length(node, other_node):
162+
try:
163+
return [node.startpos, other_node.endpos - node.startpos]
164+
except:
165+
try:
166+
return [node.first_token.startpos, other_node.last_token.endpos - node.first_token.startpos]
167+
except:
168+
pass
169+
return [-1, -1]
170+
167171
traverse(tree)
168172
return json.dumps(json_tree, separators=(',', ':'), ensure_ascii=False)
169173

170174
def write(i, indent_level = 0):
171-
global lines
172-
node = tree[i]
173-
if "value" in node:
174-
if "line_no" in node:
175-
if "end_col" in node:
176-
lines.append("\t"*indent_level + "<" + node['type'] + ' value=' + quoteEscape(node["value"]) + ' lineno="' + str(node['line_no']) + '" col="' + str(node['col']) + '" end_line_no="' + str(node['end_line_no']) + '" end_col="' + str(node['end_col']) + '">')
177-
else:
178-
lines.append("\t"*indent_level + "<" + node['type'] + ' value=' + quoteEscape(node["value"]) + ' lineno="' + str(node['line_no']) + '" col="' + str(node['col']) + '">')
179-
else:
180-
lines.append("\t"*indent_level + "<" + node['type'] + ' value=' + quoteEscape(node["value"]) + '>')
181-
else:
182-
if "line_no" in node:
183-
if "end_col" in node:
184-
lines.append("\t"*indent_level + "<" + node['type'] + ' lineno="' + str(node['line_no']) + '" col="' + str(node['col']) + '" end_line_no="' + str(node['end_line_no']) + '" end_col="' + str(node['end_col']) + '">')
185-
else:
186-
lines.append("\t"*indent_level + "<" + node['type'] + ' lineno="' + str(node['line_no']) + '" col="' + str(node['col']) + '">')
187-
else:
188-
lines.append("\t"*indent_level + "<" + node['type'] + ">")
189-
for child in node["children"]:
190-
write(int(child), indent_level + 1)
191-
lines.append("\t"*indent_level + "</" + node["type"] + ">")
175+
global lines
176+
indent_string = ' '
177+
indent = indent_string * indent_level
178+
node = tree[i]
179+
label_attr = ' label=' + quoteEscape(node["value"]) if node['value'] else ''
180+
lines.append(indent + '<tree type="' + node['type'] + '"' + label_attr + ' pos="' + str(node['pos']) + '" length="' + str(node['length']) + '">')
181+
for child in node["children"]:
182+
write(int(child), indent_level + 1)
183+
lines.append(indent + '</tree>')
192184

193185
def quoteEscape(x):
194186
return quoteattr(x);
195187

196-
def getFirstNonChildInd(tree, index):
197-
def getHelper(index):
198-
if len(tree[index]['children']) == 0:
199-
return index
200-
return max([getHelper(i) for i in tree[index]['children']])
201-
return getHelper(index) + 1
202-
def getLast(text, i):
203-
tree[i]["end_line_no"] = len(text.split('\n'))
204-
tree[i]["end_col"] = len(text.split('\n')[-1])
205-
def addEnd(tree, i, text):
206-
try:
207-
first_non_child = getFirstNonChildInd(tree, i)
208-
first_non_child_start = tree[first_non_child]["line_no"]
209-
first_non_child_col = tree[first_non_child]["col"]
210-
tree[i]["end_line_no"] = int(first_non_child_start)
211-
tree[i]["end_col"] = int(first_non_child_col)
212-
if first_non_child_col == "-1":
213-
tree[first_non_child]["col"] = "0"
214-
tree[i]["end_col"] = "0"
215-
except:
216-
getLast(text, i)
217-
for child in tree[i]['children']:
218-
addEnd(tree, child, text)
219-
220-
221-
222-
223188
if __name__ == "__main__":
224189
try:
225190
text = open(sys.argv[1], "r+").read()
@@ -228,10 +193,12 @@ if __name__ == "__main__":
228193
tree = jsontree.JSONTreeDecoder().decode(json_file)
229194

230195
x = tree[0]
231-
196+
x['length'] = len(text)
232197
lines = []
233-
addEnd(tree, 0, text)
198+
lines.append("<root>")
199+
lines.append("<context></context>")
234200
write(0)
201+
lines.append("</root>")
235202

236203
print('\n'.join(lines))
237204

0 commit comments

Comments
 (0)