annotate roundup/anypy/strings.py @ 5417:c749d6795bc2

Python 3 preparation: unichr.
author Joseph Myers <jsm@polyomino.org.uk>
date Wed, 25 Jul 2018 09:07:03 +0000
parents 56c9bcdea47f
children 55f09ca366c4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5416
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
1 # Roundup represents text internally using the native Python str type.
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
2 # In Python 3, these are Unicode strings. In Python 2, these are
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
3 # encoded using UTF-8, and the Python 2 unicode type is only used in a
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
4 # few places, generally for interacting with external modules
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
5 # requiring that type to be used.
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
6
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
7 import sys
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
8 _py3 = sys.version_info[0] > 2
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
9
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
10 def b2s(b):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
11 """Convert a UTF-8 encoded bytes object to the internal string format."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
12 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
13 return b.decode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
14 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
15 return b
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
16
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
17 def s2b(s):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
18 """Convert a string object to UTF-8 encoded bytes."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
19 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
20 return s.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
21 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
22 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
23
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
24 def s2u(s, errors='strict'):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
25 """Convert a string object to a Unicode string."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
26 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
27 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
28 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
29 return unicode(s, 'utf-8', errors)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
30
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
31 def u2s(u):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
32 """Convert a Unicode string to the internal string format."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
33 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
34 return u
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
35 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
36 return u.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
37
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
38 def us2u(s, errors='strict'):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
39 """Convert a string or Unicode string to a Unicode string."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
40 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
41 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
42 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
43 if isinstance(s, unicode):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
44 return s
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
45 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
46 return unicode(s, 'utf-8', errors)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
47
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
48 def us2s(u):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
49 """Convert a string or Unicode string to the internal string format."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
50 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
51 return u
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
52 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
53 if isinstance(u, unicode):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
54 return u.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
55 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
56 return u
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
57
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
58 def uany2s(u):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
59 """Convert a Unicode string or other object to the internal string format.
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
60
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
61 Objects that are not Unicode strings are passed to str()."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
62 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
63 return str(u)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
64 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
65 if isinstance(u, unicode):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
66 return u.encode('utf-8')
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
67 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
68 return str(u)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
69
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
70 def is_us(s):
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
71 """Return whether an object is a string or Unicode string."""
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
72 if _py3:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
73 return isinstance(s, str)
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
74 else:
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
diff changeset
75 return isinstance(s, str) or isinstance(s, unicode)
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
76
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
77 def uchr(c):
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
78 """Return the Unicode string containing the given character."""
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
79 if _py3:
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
80 return chr(c)
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
81 else:
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
82 return unichr(c)

Roundup Issue Tracker: http://roundup-tracker.org/