Mercurial > p > roundup > code
annotate roundup/anypy/strings.py @ 5416:56c9bcdea47f
Python 3 preparation: unicode.
This patch introduces roundup/anypy/strings.py, which has a comment
explaining the string representations generally used and common
functions to handle the required conversions. Places in the code that
explicitly reference the "unicode" type / built-in function are
generally changed to use the new functions (or, in a few places where
those new functions don't seem to fit well, other approaches such as
references to type(u'') or use of the codecs module). This patch does
not generally attempt to address text conversions in any places not
currently referencing the "unicode" type (although
scripts/import_sf.py is made to use binary I/O in places as fixing the
"unicode" reference didn't seem coherent otherwise).
| author | Joseph Myers <jsm@polyomino.org.uk> |
|---|---|
| date | Wed, 25 Jul 2018 09:05:58 +0000 |
| parents | |
| children | c749d6795bc2 |
| rev | line source |
|---|---|
|
5416
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
1 # Roundup represents text internally using the native Python str type. |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
2 # In Python 3, these are Unicode strings. In Python 2, these are |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
3 # encoded using UTF-8, and the Python 2 unicode type is only used in a |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
4 # few places, generally for interacting with external modules |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
5 # requiring that type to be used. |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
6 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
7 import sys |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
8 _py3 = sys.version_info[0] > 2 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
9 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
10 def b2s(b): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
11 """Convert a UTF-8 encoded bytes object to the internal string format.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
12 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
13 return b.decode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
14 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
15 return b |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
16 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
17 def s2b(s): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
18 """Convert a string object to UTF-8 encoded bytes.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
19 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
20 return s.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
21 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
22 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
23 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
24 def s2u(s, errors='strict'): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
25 """Convert a string object to a Unicode string.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
26 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
27 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
28 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
29 return unicode(s, 'utf-8', errors) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
30 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
31 def u2s(u): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
32 """Convert a Unicode string to the internal string format.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
33 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
34 return u |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
35 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
36 return u.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
37 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
38 def us2u(s, errors='strict'): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
39 """Convert a string or Unicode string to a Unicode string.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
40 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
41 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
42 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
43 if isinstance(s, unicode): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
44 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
45 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
46 return unicode(s, 'utf-8', errors) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
47 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
48 def us2s(u): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
49 """Convert a string or Unicode string to the internal string format.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
50 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
51 return u |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
52 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
53 if isinstance(u, unicode): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
54 return u.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
55 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
56 return u |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
57 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
58 def uany2s(u): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
59 """Convert a Unicode string or other object to the internal string format. |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
60 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
61 Objects that are not Unicode strings are passed to str().""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
62 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
63 return str(u) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
64 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
65 if isinstance(u, unicode): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
66 return u.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
67 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
68 return str(u) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
69 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
70 def is_us(s): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
71 """Return whether an object is a string or Unicode string.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
72 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
73 return isinstance(s, str) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
74 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
75 return isinstance(s, str) or isinstance(s, unicode) |
