forked from buriy/python-readability
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclient.py
More file actions
69 lines (52 loc) · 1.63 KB
/
Copy pathclient.py
File metadata and controls
69 lines (52 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import argparse
import sys
from readability_lxml import VERSION
from readability_lxml.readability import Document
def parse_args():
desc = "fast python port of arc90's readability tool"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--version',
action='version', version=VERSION)
parser.add_argument('-v', '--verbose',
action='store_true',
default=False,
help='Increase logging verbosity to DEBUG.')
parser.add_argument('-m', '--metadata',
action='store_true',
default=False,
help='print all metadata as well as content for the content')
parser.add_argument('path', metavar='P', type=str, nargs=1,
help="The url or file path to process in readable form.")
args = parser.parse_args()
return args
def main():
args = parse_args()
target = args.path[0]
if target.startswith('http') or target.startswith('www'):
is_url = True
url = target
else:
is_url = False
url = None
if is_url:
import urllib
target = urllib.urlopen(target)
else:
target = open(target, 'rt')
enc = sys.__stdout__.encoding or 'utf-8'
try:
doc = Document(target.read(),
debug=args.verbose,
url=url)
if args.metadata:
m = doc.summary_with_metadata()
print m.title()
print m.short_title()
print m.confidence
print m.html.encode(enc, 'replace')
else:
print doc.summary().encode(enc, 'replace')
finally:
target.close()
if __name__ == '__main__':
main()