Skip to content

Commit 7aac0f0

Browse files
committed
Return a div fragment instead of a whole HTML page
1 parent ac51783 commit 7aac0f0

1 file changed

Lines changed: 6 additions & 6 deletions

File tree

readability/readability.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,8 @@ def get_article(self, candidates, best_candidate):
140140
# Things like preambles, content split by ads that we removed, etc.
141141

142142
sibling_score_threshold = max([10, best_candidate['content_score'] * 0.2])
143-
body = B.BODY()
144-
html = B.HTML(body)
143+
article = B.DIV()
144+
article.attrib['id'] = 'article'
145145
best_elem = best_candidate['elem']
146146
for sibling in best_elem.getparent().getchildren():
147147
#if isinstance(sibling, NavigableString): continue#in lxml there no concept of simple text
@@ -163,11 +163,11 @@ def get_article(self, candidates, best_candidate):
163163
append = True
164164

165165
if append:
166-
body.append(sibling)
166+
article.append(sibling)
167167

168-
#if body is not None:
169-
# body.append(best_elem)
170-
return html
168+
#if article is not None:
169+
# article.append(best_elem)
170+
return article
171171

172172
def select_best_candidate(self, candidates):
173173
sorted_candidates = sorted(candidates.values(), key=lambda x: x['content_score'], reverse=True)

0 commit comments

Comments
 (0)