Mercurial > p > roundup > code
diff roundup/anypy/strings.py @ 5416:56c9bcdea47f
Python 3 preparation: unicode.
This patch introduces roundup/anypy/strings.py, which has a comment
explaining the string representations generally used and common
functions to handle the required conversions. Places in the code that
explicitly reference the "unicode" type / built-in function are
generally changed to use the new functions (or, in a few places where
those new functions don't seem to fit well, other approaches such as
references to type(u'') or use of the codecs module). This patch does
not generally attempt to address text conversions in any places not
currently referencing the "unicode" type (although
scripts/import_sf.py is made to use binary I/O in places as fixing the
"unicode" reference didn't seem coherent otherwise).
| author | Joseph Myers <jsm@polyomino.org.uk> |
|---|---|
| date | Wed, 25 Jul 2018 09:05:58 +0000 |
| parents | |
| children | c749d6795bc2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roundup/anypy/strings.py Wed Jul 25 09:05:58 2018 +0000 @@ -0,0 +1,75 @@ +# Roundup represents text internally using the native Python str type. +# In Python 3, these are Unicode strings. In Python 2, these are +# encoded using UTF-8, and the Python 2 unicode type is only used in a +# few places, generally for interacting with external modules +# requiring that type to be used. + +import sys +_py3 = sys.version_info[0] > 2 + +def b2s(b): + """Convert a UTF-8 encoded bytes object to the internal string format.""" + if _py3: + return b.decode('utf-8') + else: + return b + +def s2b(s): + """Convert a string object to UTF-8 encoded bytes.""" + if _py3: + return s.encode('utf-8') + else: + return s + +def s2u(s, errors='strict'): + """Convert a string object to a Unicode string.""" + if _py3: + return s + else: + return unicode(s, 'utf-8', errors) + +def u2s(u): + """Convert a Unicode string to the internal string format.""" + if _py3: + return u + else: + return u.encode('utf-8') + +def us2u(s, errors='strict'): + """Convert a string or Unicode string to a Unicode string.""" + if _py3: + return s + else: + if isinstance(s, unicode): + return s + else: + return unicode(s, 'utf-8', errors) + +def us2s(u): + """Convert a string or Unicode string to the internal string format.""" + if _py3: + return u + else: + if isinstance(u, unicode): + return u.encode('utf-8') + else: + return u + +def uany2s(u): + """Convert a Unicode string or other object to the internal string format. + + Objects that are not Unicode strings are passed to str().""" + if _py3: + return str(u) + else: + if isinstance(u, unicode): + return u.encode('utf-8') + else: + return str(u) + +def is_us(s): + """Return whether an object is a string or Unicode string.""" + if _py3: + return isinstance(s, str) + else: + return isinstance(s, str) or isinstance(s, unicode)
