annotate roundup/anypy/strings.py @ 5416:56c9bcdea47f

Python 3 preparation: unicode. This patch introduces roundup/anypy/strings.py, which has a comment explaining the string representations generally used and common functions to handle the required conversions. Places in the code that explicitly reference the "unicode" type / built-in function are generally changed to use the new functions (or, in a few places where those new functions don't seem to fit well, other approaches such as references to type(u'') or use of the codecs module). This patch does not generally attempt to address text conversions in any places not currently referencing the "unicode" type (although scripts/import_sf.py is made to use binary I/O in places as fixing the "unicode" reference didn't seem coherent otherwise).
author Joseph Myers <jsm@polyomino.org.uk>
date Wed, 25 Jul 2018 09:05:58 +0000
parents
children c749d6795bc2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5416
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
1 # Roundup represents text internally using the native Python str type.
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
2 # In Python 3, these are Unicode strings. In Python 2, these are
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
3 # encoded using UTF-8, and the Python 2 unicode type is only used in a
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
4 # few places, generally for interacting with external modules
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
5 # requiring that type to be used.
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
6
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
7 import sys
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
8 _py3 = sys.version_info[0] > 2
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
9
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
10 def b2s(b):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
11 """Convert a UTF-8 encoded bytes object to the internal string format."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
12 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
13 return b.decode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
14 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
15 return b
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
16
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
17 def s2b(s):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
18 """Convert a string object to UTF-8 encoded bytes."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
19 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
20 return s.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
21 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
22 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
23
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
24 def s2u(s, errors='strict'):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
25 """Convert a string object to a Unicode string."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
26 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
27 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
28 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
29 return unicode(s, 'utf-8', errors)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
30
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
31 def u2s(u):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
32 """Convert a Unicode string to the internal string format."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
33 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
34 return u
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
35 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
36 return u.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
37
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
38 def us2u(s, errors='strict'):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
39 """Convert a string or Unicode string to a Unicode string."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
40 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
41 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
42 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
43 if isinstance(s, unicode):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
44 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
45 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
46 return unicode(s, 'utf-8', errors)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
47
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
48 def us2s(u):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
49 """Convert a string or Unicode string to the internal string format."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
50 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
51 return u
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
52 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
53 if isinstance(u, unicode):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
54 return u.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
55 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
56 return u
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
57
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
58 def uany2s(u):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
59 """Convert a Unicode string or other object to the internal string format.
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
60
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
61 Objects that are not Unicode strings are passed to str()."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
62 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
63 return str(u)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
64 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
65 if isinstance(u, unicode):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
66 return u.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
67 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
68 return str(u)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
69
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
70 def is_us(s):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
71 """Return whether an object is a string or Unicode string."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
72 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
73 return isinstance(s, str)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
74 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
75 return isinstance(s, str) or isinstance(s, unicode)

Roundup Issue Tracker: http://roundup-tracker.org/