Skip to content

Commit 08660f6

Browse files
committed
PEP8 linting, so so close
1 parent 35792e7 commit 08660f6

1 file changed

Lines changed: 22 additions & 13 deletions

File tree

src/readability_lxml/readability.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -217,9 +217,10 @@ def get_article(self, candidates, best_candidate,
217217
if sibling is best_elem:
218218
append = True
219219
sibling_key = sibling # HashableElement(sibling)
220-
if sibling_key in candidates and \
221-
candidates[sibling_key]['content_score'] >= sibling_score_threshold:
222-
append = True
220+
if sibling_key in candidates:
221+
sib_threshhold = sibling_score_threshold
222+
if candidates[sibling_key]['content_score'] >= sib_threshhold:
223+
append = True
223224

224225
if sibling.tag == "p":
225226
link_density = self.get_link_density(sibling)
@@ -294,10 +295,11 @@ def score_paragraphs(self, ):
294295
candidates[parent_node] = self.score_node(parent_node)
295296
ordered.append(parent_node)
296297

297-
if grand_parent_node is not None and grand_parent_node not in candidates:
298-
candidates[grand_parent_node] = self.score_node(
299-
grand_parent_node)
300-
ordered.append(grand_parent_node)
298+
if grand_parent_node is not None:
299+
if grand_parent_node not in candidates:
300+
candidates[grand_parent_node] = self.score_node(
301+
grand_parent_node)
302+
ordered.append(grand_parent_node)
301303

302304
content_score = 1
303305
content_score += len(inner_text.split(','))
@@ -308,7 +310,8 @@ def score_paragraphs(self, ):
308310
#WTF? candidates[elem]['content_score'] += content_score
309311
candidates[parent_node]['content_score'] += content_score
310312
if grand_parent_node is not None:
311-
candidates[grand_parent_node]['content_score'] += content_score / 2.0
313+
add_to_score = content_score / 2.0
314+
candidates[grand_parent_node]['content_score'] += add_to_score
312315

313316
# Scale the final candidates score based on link density. Good content
314317
# should have a relatively small link density (5% or less) and be
@@ -370,9 +373,12 @@ def remove_unlikely_candidates(self):
370373
if len(s) < 2:
371374
continue
372375
#self.debug(s)
373-
if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag not in ['html', 'body']:
374-
self.debug("Removing unlikely candidate - %s" % describe(elem))
375-
elem.drop_tree()
376+
if REGEXES['unlikelyCandidatesRe'].search(s):
377+
if not REGEXES['okMaybeItsACandidateRe'].search(s):
378+
if elem.tag not in ['html', 'body']:
379+
self.debug("Removing unlikely candidate - %s" %
380+
describe(elem))
381+
elem.drop_tree()
376382

377383
def transform_misused_divs_into_paragraphs(self):
378384
for elem in self.tags(self.html, 'div'):
@@ -421,7 +427,9 @@ def sanitize(self, node, candidates):
421427
MIN_LEN = self.options.get('min_text_length',
422428
self.TEXT_LENGTH_THRESHOLD)
423429
for header in self.tags(node, "h1", "h2", "h3", "h4", "h5", "h6"):
424-
if self.class_weight(header) < 0 or self.get_link_density(header) > 0.33:
430+
class_weight = self.class_weight(header)
431+
link_density = self.get_link_density(header)
432+
if class_weight < 0 or link_density > 0.33:
425433
header.drop_tree()
426434

427435
for elem in self.tags(node, "form", "iframe", "textarea"):
@@ -455,7 +463,8 @@ def sanitize(self, node, candidates):
455463
parent_node = el.getparent()
456464
if parent_node is not None:
457465
if parent_node in candidates:
458-
content_score = candidates[parent_node]['content_score']
466+
parent = candidates[parent_node]
467+
content_score = parent['content_score']
459468
else:
460469
content_score = 0
461470
#if parent_node is not None:

0 commit comments

Comments
 (0)