Skip to content

Commit 674e5f9

Browse files
committed
Work on adding client.py to pull out cmd line code
1 parent 1c1cbae commit 674e5f9

2 files changed

Lines changed: 54 additions & 56 deletions

File tree

src/readability_lxml/client.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import argparse
2+
import sys
3+
4+
from readability_lxmly import VERSION
5+
from readability_lxml.readability import Document
6+
7+
8+
def parse_args():
9+
desc = "fast python port of arc90's readability tool"
10+
parser = argparse.ArgumentParser(description=desc)
11+
parser.add_argument('--version',
12+
action='version', version=VERSION)
13+
14+
parser.add_argument('-v', '--verbose',
15+
action='store_true',
16+
default=False,
17+
help="Increase logging verbosity to DEBUG.")
18+
19+
parser.add_argument('-u', '--url',
20+
action='store',
21+
default=None,
22+
help="Indicate that this is a url path.")
23+
24+
parser.add_argument('path', metavar='P', type=str, nargs=1,
25+
help="The url or file path to process in readable form.")
26+
27+
args = parser.parse_args()
28+
return args
29+
30+
31+
def main():
32+
args = parse_args()
33+
34+
target = None
35+
if args.url:
36+
import urllib
37+
target = urllib.urlopen(args.path[0])
38+
else:
39+
target = open(args.path[0], 'rt')
40+
41+
enc = sys.__stdout__.encoding or 'utf-8'
42+
43+
try:
44+
doc = Document(target.read(),
45+
debug=args.verbose,
46+
url=args.url)
47+
print doc.summary().encode(enc, 'replace')
48+
49+
finally:
50+
target.close()
51+
52+
53+
if __name__ == '__main__':
54+
main()

src/readability_lxml/readability.py

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -531,59 +531,3 @@ def sanitize(self, node, candidates):
531531
pass
532532

533533
return clean_attributes(tounicode(node))
534-
535-
536-
class HashableElement():
537-
def __init__(self, node):
538-
self.node = node
539-
self._path = None
540-
541-
def _get_path(self):
542-
if self._path is None:
543-
reverse_path = []
544-
node = self.node
545-
while node is not None:
546-
node_id = (node.tag, tuple(node.attrib.items()), node.text)
547-
reverse_path.append(node_id)
548-
node = node.getparent()
549-
self._path = tuple(reverse_path)
550-
return self._path
551-
path = property(_get_path)
552-
553-
def __hash__(self):
554-
return hash(self.path)
555-
556-
def __eq__(self, other):
557-
return self.path == other.path
558-
559-
def __getattr__(self, tag):
560-
return getattr(self.node, tag)
561-
562-
563-
def main():
564-
from optparse import OptionParser
565-
parser = OptionParser(usage="%prog: [options] [file]")
566-
parser.add_option('-v', '--verbose', action='store_true')
567-
parser.add_option('-u', '--url', default=None, help="use URL instead of a local file")
568-
(options, args) = parser.parse_args()
569-
570-
if not (len(args) == 1 or options.url):
571-
parser.print_help()
572-
sys.exit(1)
573-
574-
file = None
575-
if options.url:
576-
import urllib
577-
file = urllib.urlopen(options.url)
578-
else:
579-
file = open(args[0], 'rt')
580-
enc = sys.__stdout__.encoding or 'utf-8'
581-
try:
582-
print Document(file.read(),
583-
debug=options.verbose,
584-
url=options.url).summary().encode(enc, 'replace')
585-
finally:
586-
file.close()
587-
588-
if __name__ == '__main__':
589-
main()

0 commit comments

Comments
 (0)