Skip to content

Commit 097fc5d

Browse files
committed
-
1 parent 4b4ec23 commit 097fc5d

File tree

14 files changed

+1749
-13
lines changed

14 files changed

+1749
-13
lines changed

source_py2/python_toolbox/cute_testing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import nose
77
import sys
88

9-
from python_toolbox.third_party import python_toolbox.third_party.unittest2
9+
from python_toolbox.third_party import unittest2
1010

1111
from python_toolbox import cute_inspect
1212
from python_toolbox import context_management
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
"""Cache lines from files.
2+
3+
This is intended to read lines from modules imported -- hence if a filename
4+
is not found, it will look down the module search path for a file by
5+
that name.
6+
"""
7+
8+
import functools
9+
import io
10+
import sys
11+
import os
12+
import tokenize
13+
14+
__all__ = ["getline", "clearcache", "checkcache"]
15+
16+
def getline(filename, lineno, module_globals=None):
17+
lines = getlines(filename, module_globals)
18+
if 1 <= lineno <= len(lines):
19+
return lines[lineno-1]
20+
else:
21+
return ''
22+
23+
24+
# The cache
25+
26+
# The cache. Maps filenames to either a thunk which will provide source code,
27+
# or a tuple (size, mtime, lines, fullname) once loaded.
28+
cache = {}
29+
30+
31+
def clearcache():
32+
"""Clear the cache entirely."""
33+
34+
global cache
35+
cache = {}
36+
37+
38+
def getlines(filename, module_globals=None):
39+
"""Get the lines for a file from the cache.
40+
Update the cache if it doesn't contain an entry for this file already."""
41+
42+
if filename in cache:
43+
entry = cache[filename]
44+
if len(entry) == 1:
45+
return updatecache(filename, module_globals)
46+
return cache[filename][2]
47+
else:
48+
return updatecache(filename, module_globals)
49+
50+
51+
def checkcache(filename=None):
52+
"""Discard cache entries that are out of date.
53+
(This is not checked upon each call!)"""
54+
55+
if filename is None:
56+
filenames = list(cache.keys())
57+
else:
58+
if filename in cache:
59+
filenames = [filename]
60+
else:
61+
return
62+
63+
for filename in filenames:
64+
entry = cache[filename]
65+
if len(entry) == 1:
66+
# lazy cache entry, leave it lazy.
67+
continue
68+
size, mtime, lines, fullname = entry
69+
if mtime is None:
70+
continue # no-op for files loaded via a __loader__
71+
try:
72+
stat = os.stat(fullname)
73+
except OSError:
74+
del cache[filename]
75+
continue
76+
if size != stat.st_size or mtime != stat.st_mtime:
77+
del cache[filename]
78+
79+
80+
def updatecache(filename, module_globals=None):
81+
"""Update a cache entry and return its list of lines.
82+
If something's wrong, print a message, discard the cache entry,
83+
and return an empty list."""
84+
85+
if filename in cache:
86+
if len(cache[filename]) != 1:
87+
del cache[filename]
88+
if not filename or (filename.startswith('<') and filename.endswith('>')):
89+
return []
90+
91+
fullname = filename
92+
try:
93+
stat = os.stat(fullname)
94+
except OSError:
95+
basename = filename
96+
97+
# Realise a lazy loader based lookup if there is one
98+
# otherwise try to lookup right now.
99+
if lazycache(filename, module_globals):
100+
try:
101+
data = cache[filename][0]()
102+
except (ImportError, OSError):
103+
pass
104+
else:
105+
if data is None:
106+
# No luck, the PEP302 loader cannot find the source
107+
# for this module.
108+
return []
109+
cache[filename] = (
110+
len(data), None,
111+
[line+'\n' for line in data.splitlines()], fullname
112+
)
113+
return cache[filename][2]
114+
115+
# Try looking through the module search path, which is only useful
116+
# when handling a relative filename.
117+
if os.path.isabs(filename):
118+
return []
119+
120+
for dirname in sys.path:
121+
try:
122+
fullname = os.path.join(dirname, basename)
123+
except (TypeError, AttributeError):
124+
# Not sufficiently string-like to do anything useful with.
125+
continue
126+
try:
127+
stat = os.stat(fullname)
128+
break
129+
except OSError:
130+
pass
131+
else:
132+
return []
133+
try:
134+
with _tokenize_open(fullname) as fp:
135+
lines = fp.readlines()
136+
except OSError:
137+
return []
138+
if lines and not lines[-1].endswith('\n'):
139+
lines[-1] += '\n'
140+
size, mtime = stat.st_size, stat.st_mtime
141+
cache[filename] = size, mtime, lines, fullname
142+
return lines
143+
144+
145+
def lazycache(filename, module_globals):
146+
"""Seed the cache for filename with module_globals.
147+
148+
The module loader will be asked for the source only when getlines is
149+
called, not immediately.
150+
151+
If there is an entry in the cache already, it is not altered.
152+
153+
:return: True if a lazy load is registered in the cache,
154+
otherwise False. To register such a load a module loader with a
155+
get_source method must be found, the filename must be a cachable
156+
filename, and the filename must not be already cached.
157+
"""
158+
if filename in cache:
159+
if len(cache[filename]) == 1:
160+
return True
161+
else:
162+
return False
163+
if not filename or (filename.startswith('<') and filename.endswith('>')):
164+
return False
165+
# Try for a __loader__, if available
166+
if module_globals and '__loader__' in module_globals:
167+
name = module_globals.get('__name__')
168+
loader = module_globals['__loader__']
169+
get_source = getattr(loader, 'get_source', None)
170+
171+
if name and get_source:
172+
get_lines = functools.partial(get_source, name)
173+
cache[filename] = (get_lines,)
174+
return True
175+
return False
176+
177+
178+
#### ---- avoiding having a tokenize2 backport for now ----
179+
from codecs import lookup, BOM_UTF8
180+
import re
181+
cookie_re = re.compile(r'^[ \t\f]*#.*coding[:=][ \t]*([-\w.]+)'.encode('utf8'))
182+
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)'.encode('utf8'))
183+
184+
185+
def _tokenize_open(filename):
186+
"""Open a file in read only mode using the encoding detected by
187+
detect_encoding().
188+
"""
189+
buffer = io.open(filename, 'rb')
190+
encoding, lines = _detect_encoding(buffer.readline)
191+
buffer.seek(0)
192+
text = io.TextIOWrapper(buffer, encoding, line_buffering=True)
193+
text.mode = 'r'
194+
return text
195+
196+
197+
def _get_normal_name(orig_enc):
198+
"""Imitates get_normal_name in tokenizer.c."""
199+
# Only care about the first 12 characters.
200+
enc = orig_enc[:12].lower().replace("_", "-")
201+
if enc == "utf-8" or enc.startswith("utf-8-"):
202+
return "utf-8"
203+
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
204+
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
205+
return "iso-8859-1"
206+
return orig_enc
207+
208+
209+
def _detect_encoding(readline):
210+
"""
211+
The detect_encoding() function is used to detect the encoding that should
212+
be used to decode a Python source file. It requires one argument, readline,
213+
in the same way as the tokenize() generator.
214+
215+
It will call readline a maximum of twice, and return the encoding used
216+
(as a string) and a list of any lines (left as bytes) it has read in.
217+
218+
It detects the encoding from the presence of a utf-8 bom or an encoding
219+
cookie as specified in pep-0263. If both a bom and a cookie are present,
220+
but disagree, a SyntaxError will be raised. If the encoding cookie is an
221+
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
222+
'utf-8-sig' is returned.
223+
224+
If no encoding is specified, then the default of 'utf-8' will be returned.
225+
"""
226+
try:
227+
filename = readline.__self__.name
228+
except AttributeError:
229+
filename = None
230+
bom_found = False
231+
encoding = None
232+
default = 'utf-8'
233+
def read_or_stop():
234+
try:
235+
return readline()
236+
except StopIteration:
237+
return b''
238+
239+
def find_cookie(line):
240+
try:
241+
# Decode as UTF-8. Either the line is an encoding declaration,
242+
# in which case it should be pure ASCII, or it must be UTF-8
243+
# per default encoding.
244+
line_string = line.decode('utf-8')
245+
except UnicodeDecodeError:
246+
msg = "invalid or missing encoding declaration"
247+
if filename is not None:
248+
msg = '{0} for {1!r}'.format(msg, filename)
249+
raise SyntaxError(msg)
250+
251+
match = cookie_re.match(line)
252+
if not match:
253+
return None
254+
encoding = _get_normal_name(match.group(1).decode('utf-8'))
255+
try:
256+
codec = lookup(encoding)
257+
except LookupError:
258+
# This behaviour mimics the Python interpreter
259+
if filename is None:
260+
msg = "unknown encoding: " + encoding
261+
else:
262+
msg = "unknown encoding for {!r}: {}".format(filename,
263+
encoding)
264+
raise SyntaxError(msg)
265+
266+
if bom_found:
267+
if encoding != 'utf-8':
268+
# This behaviour mimics the Python interpreter
269+
if filename is None:
270+
msg = 'encoding problem: utf-8'
271+
else:
272+
msg = 'encoding problem for {!r}: utf-8'.format(filename)
273+
raise SyntaxError(msg)
274+
encoding += '-sig'
275+
return encoding
276+
277+
first = read_or_stop()
278+
if first.startswith(BOM_UTF8):
279+
bom_found = True
280+
first = first[3:]
281+
default = 'utf-8-sig'
282+
if not first:
283+
return default, []
284+
285+
encoding = find_cookie(first)
286+
if encoding:
287+
return encoding, [first]
288+
if not blank_re.match(first):
289+
return default, [first]
290+
291+
second = read_or_stop()
292+
if not second:
293+
return default, [first]
294+
295+
encoding = find_cookie(second)
296+
if encoding:
297+
return encoding, [first, second]
298+
299+
return default, [first, second]
300+
301+

0 commit comments

Comments
 (0)