Mercurial > p > roundup > code
annotate roundup/anypy/strings.py @ 5417:c749d6795bc2
Python 3 preparation: unichr.
| author | Joseph Myers <jsm@polyomino.org.uk> |
|---|---|
| date | Wed, 25 Jul 2018 09:07:03 +0000 |
| parents | 56c9bcdea47f |
| children | 55f09ca366c4 |
| rev | line source |
|---|---|
|
5416
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
1 # Roundup represents text internally using the native Python str type. |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
2 # In Python 3, these are Unicode strings. In Python 2, these are |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
3 # encoded using UTF-8, and the Python 2 unicode type is only used in a |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
4 # few places, generally for interacting with external modules |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
5 # requiring that type to be used. |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
6 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
7 import sys |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
8 _py3 = sys.version_info[0] > 2 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
9 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
10 def b2s(b): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
11 """Convert a UTF-8 encoded bytes object to the internal string format.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
12 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
13 return b.decode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
14 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
15 return b |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
16 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
17 def s2b(s): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
18 """Convert a string object to UTF-8 encoded bytes.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
19 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
20 return s.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
21 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
22 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
23 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
24 def s2u(s, errors='strict'): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
25 """Convert a string object to a Unicode string.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
26 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
27 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
28 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
29 return unicode(s, 'utf-8', errors) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
30 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
31 def u2s(u): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
32 """Convert a Unicode string to the internal string format.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
33 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
34 return u |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
35 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
36 return u.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
37 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
38 def us2u(s, errors='strict'): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
39 """Convert a string or Unicode string to a Unicode string.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
40 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
41 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
42 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
43 if isinstance(s, unicode): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
44 return s |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
45 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
46 return unicode(s, 'utf-8', errors) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
47 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
48 def us2s(u): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
49 """Convert a string or Unicode string to the internal string format.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
50 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
51 return u |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
52 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
53 if isinstance(u, unicode): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
54 return u.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
55 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
56 return u |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
57 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
58 def uany2s(u): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
59 """Convert a Unicode string or other object to the internal string format. |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
60 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
61 Objects that are not Unicode strings are passed to str().""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
62 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
63 return str(u) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
64 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
65 if isinstance(u, unicode): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
66 return u.encode('utf-8') |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
67 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
68 return str(u) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
69 |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
70 def is_us(s): |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
71 """Return whether an object is a string or Unicode string.""" |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
72 if _py3: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
73 return isinstance(s, str) |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
74 else: |
|
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff
changeset
|
75 return isinstance(s, str) or isinstance(s, unicode) |
|
5417
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
76 |
|
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
77 def uchr(c): |
|
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
78 """Return the Unicode string containing the given character.""" |
|
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
79 if _py3: |
|
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
80 return chr(c) |
|
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
81 else: |
|
c749d6795bc2
Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5416
diff
changeset
|
82 return unichr(c) |
