forked from buriy/python-readability
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgen_test.py
More file actions
174 lines (150 loc) · 5 KB
/
Copy pathgen_test.py
File metadata and controls
174 lines (150 loc) · 5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
This program facilitates the creation of a regression test case as used by the
test module. It uses the current readability algorithm to capture a benchmark
and construct a new test case.
"""
import argparse
import errno
import os
import os.path
import urllib2
import yaml
from readability_lxml import readability
from readability_lxml import urlfetch
from regression import (
TEST_DATA_PATH,
ORIGINAL_SUFFIX,
READABLE_SUFFIX,
YAML_EXTENSION,
adjust_url_map,
read_yaml
)
OVERWRITE_QUESTION = '%s exists; overwrite and continue (y/n)? '
def y_or_n(question):
while True:
response = raw_input(question).strip()
if len(response) > 0:
return response[0] in ['y', 'Y']
def write_file(test_name, suffix, data):
path = os.path.join(TEST_DATA_PATH, test_name + suffix)
mode = 0644
try:
fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
except OSError as e:
if e.errno == errno.EEXIST:
if y_or_n(OVERWRITE_QUESTION % path):
fd = os.open(path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, mode)
else:
return False
else:
raise e
f = os.fdopen(fd, 'w')
f.write(data)
return True
def write_original(test_name, orig):
return write_file(test_name, ORIGINAL_SUFFIX, orig)
def write_readable(test_name, orig, options):
rdbl_doc = readability.Document(orig, **options)
summary = rdbl_doc.summary()
return write_file(test_name, READABLE_SUFFIX, summary.html)
def read_spec(test_name):
yaml_path = os.path.join(
TEST_DATA_PATH,
test_name + YAML_EXTENSION
)
return read_yaml(yaml_path)
def read_orig(test_name, url = None):
"""
Reads the original HTML for a given test. If a url is provided, the HTML
is fetched from it. Otherwise, we look for an existing local copy. This
returns a pair: (HTML string, True iff the HTML has been or is already
stored in a local copy).
"""
if url:
orig = urllib2.urlopen(url).read()
write_result = write_file(test_name, ORIGINAL_SUFFIX, orig)
return orig, write_result
else:
orig_path = os.path.join(
TEST_DATA_PATH,
test_name + ORIGINAL_SUFFIX
)
orig = open(orig_path).read()
return orig, True
def create(args):
# TODO: Make this work for multi-page articles.
spec_dict = {'url': args.url, 'test_description': args.test_description}
spec = yaml.dump(spec_dict, default_flow_style = False)
if not write_file(args.test_name, YAML_EXTENSION, spec):
return False
orig = urllib2.urlopen(url).read()
if not write_original(args.test_name, orig):
return False
if not write_readable(args.test_name, orig):
return False
return True
def genbench(args):
spec_dict = read_spec(args.test_name)
if args.refetch:
url = spec_dict['url']
else:
url = None
url_map = adjust_url_map(spec_dict.get('url_map', dict()))
fetcher = urlfetch.MockUrlFetch(url_map)
options = {'url': spec_dict['url'], 'urlfetch': fetcher}
orig, success = read_orig(args.test_name, url)
if not success:
return False
rdbl_doc = readability.Document(orig, **options)
summary = rdbl_doc.summary()
if not write_file(args.test_name, READABLE_SUFFIX, summary.html):
return False
return True
DESCRIPTION = 'Create a readability regression test case.'
def main():
parser = argparse.ArgumentParser(description = DESCRIPTION)
subparsers = parser.add_subparsers(help = 'available subcommands')
parser_create = subparsers.add_parser(
'create',
help = 'create an entirely new test'
)
parser_create.add_argument(
'url',
metavar = 'url',
help = 'the url for which to generate a test'
)
parser_create.add_argument(
'test_name',
metavar = 'test-name',
help = 'the name of the test'
)
parser_create.add_argument(
'test_description',
metavar = 'test-description',
help = 'the description of the test'
)
parser_create.set_defaults(func = create)
parser_genbench = subparsers.add_parser(
'genbench',
help = 'regenerate the benchmark for an existing test'
)
parser_genbench.add_argument(
'test_name',
metavar = 'test-name',
help = 'the name of the test'
)
parser_genbench.add_argument(
'--refetch',
dest = 'refetch',
action = 'store_const',
const = True,
default = False,
help = 'if set, original html is refetched from the url'
)
parser_genbench.set_defaults(func = genbench)
args = parser.parse_args()
result = args.func(args)
if not result:
print('test was not fully generated')
if __name__ == '__main__':
main()