Mercurial > p > roundup > code
comparison roundup/anypy/strings.py @ 6048:81990ac0b013
flak8: move import; whitespace changes; ignore undefined
global 'unicode' in py3
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sat, 11 Jan 2020 16:38:38 -0500 |
| parents | bb7865241f8a |
| children | 82f870433b18 |
comparison
equal
deleted
inserted
replaced
| 6047:142553f58694 | 6048:81990ac0b013 |
|---|---|
| 3 # encoded using UTF-8, and the Python 2 unicode type is only used in a | 3 # encoded using UTF-8, and the Python 2 unicode type is only used in a |
| 4 # few places, generally for interacting with external modules | 4 # few places, generally for interacting with external modules |
| 5 # requiring that type to be used. | 5 # requiring that type to be used. |
| 6 | 6 |
| 7 import sys | 7 import sys |
| 8 import io | |
| 9 | |
| 8 _py3 = sys.version_info[0] > 2 | 10 _py3 = sys.version_info[0] > 2 |
| 9 | 11 |
| 10 import io | |
| 11 if _py3: | 12 if _py3: |
| 12 StringIO = io.StringIO | 13 StringIO = io.StringIO |
| 13 else: | 14 else: |
| 14 StringIO = io.BytesIO | 15 StringIO = io.BytesIO |
| 16 | |
| 15 | 17 |
| 16 def b2s(b): | 18 def b2s(b): |
| 17 """Convert a UTF-8 encoded bytes object to the internal string format.""" | 19 """Convert a UTF-8 encoded bytes object to the internal string format.""" |
| 18 if _py3: | 20 if _py3: |
| 19 return b.decode('utf-8') | 21 return b.decode('utf-8') |
| 20 else: | 22 else: |
| 21 return b | 23 return b |
| 22 | 24 |
| 25 | |
| 23 def s2b(s): | 26 def s2b(s): |
| 24 """Convert a string object to UTF-8 encoded bytes.""" | 27 """Convert a string object to UTF-8 encoded bytes.""" |
| 25 if _py3: | 28 if _py3: |
| 26 return s.encode('utf-8') | 29 return s.encode('utf-8') |
| 27 else: | 30 else: |
| 28 return s | 31 return s |
| 29 | 32 |
| 33 | |
| 30 def bs2b(s): | 34 def bs2b(s): |
| 31 """Convert a string object or UTF-8 encoded bytes to UTF-8 encoded bytes.""" | 35 """Convert a string object or UTF-8 encoded bytes to UTF-8 encoded bytes. |
| 36 """ | |
| 32 if _py3: | 37 if _py3: |
| 33 if isinstance(s, bytes): | 38 if isinstance(s, bytes): |
| 34 return s | 39 return s |
| 35 else: | 40 else: |
| 36 return s.encode('utf-8') | 41 return s.encode('utf-8') |
| 37 else: | 42 else: |
| 38 return s | 43 return s |
| 39 | 44 |
| 45 | |
| 40 def s2u(s, errors='strict'): | 46 def s2u(s, errors='strict'): |
| 41 """Convert a string object to a Unicode string.""" | 47 """Convert a string object to a Unicode string.""" |
| 42 if _py3: | 48 if _py3: |
| 43 return s | 49 return s |
| 44 else: | 50 else: |
| 45 return unicode(s, 'utf-8', errors) | 51 return unicode(s, 'utf-8', errors) # noqa: 821 |
| 52 | |
| 46 | 53 |
| 47 def u2s(u): | 54 def u2s(u): |
| 48 """Convert a Unicode string to the internal string format.""" | 55 """Convert a Unicode string to the internal string format.""" |
| 49 if _py3: | 56 if _py3: |
| 50 return u | 57 return u |
| 51 else: | 58 else: |
| 52 return u.encode('utf-8') | 59 return u.encode('utf-8') |
| 53 | 60 |
| 61 | |
| 54 def us2u(s, errors='strict'): | 62 def us2u(s, errors='strict'): |
| 55 """Convert a string or Unicode string to a Unicode string.""" | 63 """Convert a string or Unicode string to a Unicode string.""" |
| 56 if _py3: | 64 if _py3: |
| 57 return s | 65 return s |
| 58 else: | 66 else: |
| 59 if isinstance(s, unicode): | 67 if isinstance(s, unicode): # noqa: 821 |
| 60 return s | 68 return s |
| 61 else: | 69 else: |
| 62 return unicode(s, 'utf-8', errors) | 70 return unicode(s, 'utf-8', errors) # noqa: 821 |
| 71 | |
| 63 | 72 |
| 64 def us2s(u): | 73 def us2s(u): |
| 65 """Convert a string or Unicode string to the internal string format.""" | 74 """Convert a string or Unicode string to the internal string format.""" |
| 66 if _py3: | 75 if _py3: |
| 67 return u | 76 return u |
| 68 else: | 77 else: |
| 69 if isinstance(u, unicode): | 78 if isinstance(u, unicode): # noqa: 821 |
| 70 return u.encode('utf-8') | 79 return u.encode('utf-8') |
| 71 else: | 80 else: |
| 72 return u | 81 return u |
| 82 | |
| 73 | 83 |
| 74 def uany2s(u): | 84 def uany2s(u): |
| 75 """Convert a Unicode string or other object to the internal string format. | 85 """Convert a Unicode string or other object to the internal string format. |
| 76 | 86 |
| 77 Objects that are not Unicode strings are passed to str().""" | 87 Objects that are not Unicode strings are passed to str().""" |
| 78 if _py3: | 88 if _py3: |
| 79 return str(u) | 89 return str(u) |
| 80 else: | 90 else: |
| 81 if isinstance(u, unicode): | 91 if isinstance(u, unicode): # noqa: 821 |
| 82 return u.encode('utf-8') | 92 return u.encode('utf-8') |
| 83 else: | 93 else: |
| 84 return str(u) | 94 return str(u) |
| 95 | |
| 85 | 96 |
| 86 def is_us(s): | 97 def is_us(s): |
| 87 """Return whether an object is a string or Unicode string.""" | 98 """Return whether an object is a string or Unicode string.""" |
| 88 if _py3: | 99 if _py3: |
| 89 return isinstance(s, str) | 100 return isinstance(s, str) |
| 90 else: | 101 else: |
| 91 return isinstance(s, str) or isinstance(s, unicode) | 102 return isinstance(s, str) or isinstance(s, unicode) # noqa: 821 |
| 103 | |
| 92 | 104 |
| 93 def uchr(c): | 105 def uchr(c): |
| 94 """Return the Unicode string containing the given character.""" | 106 """Return the Unicode string containing the given character.""" |
| 95 if _py3: | 107 if _py3: |
| 96 return chr(c) | 108 return chr(c) |
| 97 else: | 109 else: |
| 98 return unichr(c) | 110 return unichr(c) # noqa: 821 |
| 99 | 111 |
| 100 # CSV files used for export and import represent strings in the style | 112 # CSV files used for export and import represent strings in the style |
| 101 # used by repr in Python 2; this means that each byte of the UTF-8 | 113 # used by repr in Python 2; this means that each byte of the UTF-8 |
| 102 # representation is represented by a \x escape if not a printable | 114 # representation is represented by a \x escape if not a printable |
| 103 # ASCII character. When such a representation is interpreted by eval | 115 # ASCII character. When such a representation is interpreted by eval |
| 106 # as ISO-8859-1 produces the correct byte-string which must then be | 118 # as ISO-8859-1 produces the correct byte-string which must then be |
| 107 # decoded as UTF-8 to produce the correct Unicode string. The same | 119 # decoded as UTF-8 to produce the correct Unicode string. The same |
| 108 # representations are also used for journal storage in RDBMS | 120 # representations are also used for journal storage in RDBMS |
| 109 # databases, so that the database can be compatible between Python 2 | 121 # databases, so that the database can be compatible between Python 2 |
| 110 # and Python 3. | 122 # and Python 3. |
| 123 | |
| 111 | 124 |
| 112 def repr_export(v): | 125 def repr_export(v): |
| 113 """Return a Python-2-style representation of a value for export to CSV.""" | 126 """Return a Python-2-style representation of a value for export to CSV.""" |
| 114 if _py3: | 127 if _py3: |
| 115 if isinstance(v, str): | 128 if isinstance(v, str): |
| 122 return '{%s}' % ', '.join(repr_vals) | 135 return '{%s}' % ', '.join(repr_vals) |
| 123 else: | 136 else: |
| 124 return repr(v) | 137 return repr(v) |
| 125 else: | 138 else: |
| 126 return repr(v) | 139 return repr(v) |
| 140 | |
| 127 | 141 |
| 128 def eval_import(s): | 142 def eval_import(s): |
| 129 """Evaluate a Python-2-style value imported from a CSV file.""" | 143 """Evaluate a Python-2-style value imported from a CSV file.""" |
| 130 if _py3: | 144 if _py3: |
| 131 v = eval(s) | 145 v = eval(s) |
