Skip to content

Commit 68b4afc

Browse files
author
p12
committed
Transform/DDG: preserve short parenthesized text within description
1 parent 1f8ccf6 commit 68b4afc

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

ddg_parse_html.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ def get_declaration(root_el, name):
130130
'''
131131
def process_description(el):
132132
char_limit = 200
133+
min_paren_size = 40
133134

134135
el = deepcopy(el) # we'll modify the tree
135136
el.tag = 'root'
@@ -174,7 +175,10 @@ def process_description(el):
174175
if open_count == 0 and open_paren_count > 0:
175176
open_paren_count -= 1
176177
if open_paren_count == 0:
177-
del_ranges.append((last_paren_open, t.start()+1))
178+
end = t.start()+1
179+
text = desc[last_paren_open:end]
180+
if text.find('ᚃ') != -1 or len(text) > min_paren_size:
181+
del_ranges.append((last_paren_open, t.start()+1))
178182

179183
else:
180184
if mt[1] != '/':

0 commit comments

Comments
 (0)