Mercurial > p > roundup > code
comparison roundup/msgfmt.py @ 7772:6a1c1cd69582
chore: 'id' var shadows builtin, lint markers; sort imports
Replaced 'id' variable with label and 'ids' variable with 'labels' for
consistency.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sat, 02 Mar 2024 13:45:29 -0500 |
| parents | 4d2e1fa03f0f |
| children | a9bac0cb4019 |
comparison
equal
deleted
inserted
replaced
| 7771:4261449081be | 7772:6a1c1cd69582 |
|---|---|
| 30 | 30 |
| 31 * msgfmt.PoSyntaxError if the po file has syntax errors | 31 * msgfmt.PoSyntaxError if the po file has syntax errors |
| 32 """ | 32 """ |
| 33 | 33 |
| 34 import array | 34 import array |
| 35 from ast import literal_eval | |
| 36 import codecs | 35 import codecs |
| 37 from email.parser import HeaderParser | |
| 38 import struct | 36 import struct |
| 39 import sys | 37 import sys |
| 38 from ast import literal_eval | |
| 39 from email.parser import HeaderParser | |
| 40 | 40 |
| 41 PY3 = sys.version_info[0] == 3 | 41 PY3 = sys.version_info[0] == 3 |
| 42 if PY3: | 42 if PY3: |
| 43 def header_charset(s): | 43 def header_charset(s): |
| 44 p = HeaderParser() | 44 p = HeaderParser() |
| 53 return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset() | 53 return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset() |
| 54 | 54 |
| 55 from cStringIO import StringIO as BytesIO | 55 from cStringIO import StringIO as BytesIO |
| 56 # file is a type defined only under python 2. | 56 # file is a type defined only under python 2. |
| 57 # Flake8 when run in py3 flags this. | 57 # Flake8 when run in py3 flags this. |
| 58 FILE_TYPE = file # noqa: 821 | 58 FILE_TYPE = file # noqa: F821 |
| 59 | 59 |
| 60 | 60 |
| 61 class PoSyntaxError(Exception): | 61 class PoSyntaxError(Exception): |
| 62 """ Syntax error in a po file """ | 62 """ Syntax error in a po file """ |
| 63 | 63 |
| 100 if first.startswith(codecs.BOM_UTF8): | 100 if first.startswith(codecs.BOM_UTF8): |
| 101 first = first.lstrip(codecs.BOM_UTF8) | 101 first = first.lstrip(codecs.BOM_UTF8) |
| 102 return [first] + output.readlines() | 102 return [first] + output.readlines() |
| 103 return output | 103 return output |
| 104 | 104 |
| 105 def add(self, context, id, string, fuzzy): | 105 def add(self, context, label, string, fuzzy): |
| 106 "Add a non-empty and non-fuzzy translation to the dictionary." | 106 "Add a non-empty and non-fuzzy translation to the dictionary." |
| 107 if string and not fuzzy: | 107 if string and not fuzzy: |
| 108 # The context is put before the id and separated by a EOT char. | 108 # The context is put before the id and separated by a EOT char. |
| 109 if context: | 109 if context: |
| 110 id = context + u'\x04' + id | 110 label = context + u'\x04' + label |
| 111 if not id: | 111 if not label: |
| 112 # See whether there is an encoding declaration | 112 # See whether there is an encoding declaration |
| 113 charset = header_charset(string) | 113 charset = header_charset(string) |
| 114 if charset: | 114 if charset: |
| 115 # decode header in proper encoding | 115 # decode header in proper encoding |
| 116 string = string.encode(self.encoding).decode(charset) | 116 string = string.encode(self.encoding).decode(charset) |
| 117 if not PY3: | 117 if not PY3: |
| 118 # undo damage done by literal_eval in Python 2.x | 118 # undo damage done by literal_eval in Python 2.x |
| 119 string = string.encode(self.encoding).decode(charset) | 119 string = string.encode(self.encoding).decode(charset) |
| 120 self.encoding = charset | 120 self.encoding = charset |
| 121 self.messages[id] = string | 121 self.messages[label] = string |
| 122 | 122 |
| 123 def generate(self): | 123 def generate(self): |
| 124 "Return the generated output." | 124 "Return the generated output." |
| 125 # the keys are sorted in the .mo file | 125 # the keys are sorted in the .mo file |
| 126 keys = sorted(self.messages.keys()) | 126 keys = sorted(self.messages.keys()) |
| 127 offsets = [] | 127 offsets = [] |
| 128 ids = strs = b'' | 128 labels = strs = b'' |
| 129 for id in keys: | 129 for label in keys: |
| 130 msg = self.messages[id].encode(self.encoding) | 130 msg = self.messages[label].encode(self.encoding) |
| 131 id = id.encode(self.encoding) | 131 label = label.encode(self.encoding) |
| 132 # For each string, we need size and file offset. Each string is | 132 # For each string, we need size and file offset. Each string is |
| 133 # NUL terminated; the NUL does not count into the size. | 133 # NUL terminated; the NUL does not count into the size. |
| 134 offsets.append((len(ids), len(id), len(strs), | 134 offsets.append((len(labels), len(label), len(strs), |
| 135 len(msg))) | 135 len(msg))) |
| 136 ids += id + b'\0' | 136 labels += label + b'\0' |
| 137 strs += msg + b'\0' | 137 strs += msg + b'\0' |
| 138 output = b'' | 138 output = b'' |
| 139 # The header is 7 32-bit unsigned integers. We don't use hash tables, | 139 # The header is 7 32-bit unsigned integers. We don't use hash tables, |
| 140 # so the keys start right after the index tables. | 140 # so the keys start right after the index tables. |
| 141 keystart = 7 * 4 + 16 * len(keys) | 141 keystart = 7 * 4 + 16 * len(keys) |
| 142 # and the values start after the keys | 142 # and the values start after the keys |
| 143 valuestart = keystart + len(ids) | 143 valuestart = keystart + len(labels) |
| 144 koffsets = [] | 144 koffsets = [] |
| 145 voffsets = [] | 145 voffsets = [] |
| 146 # The string table first has the list of keys, then the list of values. | 146 # The string table first has the list of keys, then the list of values. |
| 147 # Each entry has first the size of the string, then the file offset. | 147 # Each entry has first the size of the string, then the file offset. |
| 148 for o1, l1, o2, l2 in offsets: | 148 for o1, l1, o2, l2 in offsets: |
| 161 0, keystart) # size and offset of hash table | 161 0, keystart) # size and offset of hash table |
| 162 if PY3: | 162 if PY3: |
| 163 output += array.array("i", offsets).tobytes() | 163 output += array.array("i", offsets).tobytes() |
| 164 else: | 164 else: |
| 165 output += array.array("i", offsets).tostring() | 165 output += array.array("i", offsets).tostring() |
| 166 output += ids | 166 output += labels |
| 167 output += strs | 167 output += strs |
| 168 return output | 168 return output |
| 169 | 169 |
| 170 def get(self): | 170 def get(self): |
| 171 """ """ | 171 """ """ |
| 182 section = None | 182 section = None |
| 183 fuzzy = 0 | 183 fuzzy = 0 |
| 184 msgid = msgstr = msgctxt = u'' | 184 msgid = msgstr = msgctxt = u'' |
| 185 | 185 |
| 186 # Parse the catalog | 186 # Parse the catalog |
| 187 # ruff: noqa: E741 PLW2901 - 'l' var name ok; overwrite 'l' ok | |
| 187 lno = 0 | 188 lno = 0 |
| 188 for l in self.readPoData(): | 189 for l in self.readPoData(): |
| 189 l = l.decode(self.encoding) | 190 l = l.decode(self.encoding) |
| 190 lno += 1 | 191 lno += 1 |
| 191 # If we get a comment line after a msgstr or a line starting with | 192 # If we get a comment line after a msgstr or a line starting with |
