comparison roundup/msgfmt.py @ 5450:f2fade4552c5

replaced msgfmt.py with latest version supporting Python 3 fixed setup scripts for Python 3
author Christof Meerwald <cmeerw@cmeerw.org>
date Sat, 21 Jul 2018 16:29:20 +0100
parents 23b8e6067f7c
children 4d2e1fa03f0f
comparison
equal deleted inserted replaced
5449:ddf1cf299ebc 5450:f2fade4552c5
1 #! /usr/bin/env python 1 #! /usr/bin/env python
2 # -*- coding: iso-8859-1 -*- 2 # -*- coding: iso-8859-1 -*-
3 # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 3 # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
4 # Plural forms support added by alexander smishlajev <alex@tycobka.lv> 4 #
5 # Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless
6 # translation service (PTS) of Zope
7 #
8 # Fixed some bugs and updated to support msgctxt
9 # by Hanno Schlichting <hanno@hannosch.eu>
5 10
6 """Generate binary message catalog from textual translation description. 11 """Generate binary message catalog from textual translation description.
7 12
8 This program converts a textual Uniforum-style message catalog (.po file) into 13 This program converts a textual Uniforum-style message catalog (.po file) into
9 a binary GNU catalog (.mo file). This is essentially the same function as the 14 a binary GNU catalog (.mo file). This is essentially the same function as the
10 GNU msgfmt program, however, it is a simpler implementation. 15 GNU msgfmt program, however, it is a simpler implementation.
11 16
12 Usage: msgfmt.py [OPTIONS] filename.po 17 This file was taken from Python-2.3.2/Tools/i18n and altered in several ways.
13 18 Now you can simply use it from another python module:
14 Options: 19
15 -o file 20 from msgfmt import Msgfmt
16 --output-file=file 21 mo = Msgfmt(po).get()
17 Specify the output file to write to. If omitted, output will go to a 22
18 file named filename.mo (based off the input file name). 23 where po is path to a po file as string, an opened po file ready for reading or
19 24 a list of strings (readlines of a po file) and mo is the compiled mo file as
20 -h 25 binary string.
21 --help 26
22 Print this message and exit. 27 Exceptions:
23 28
24 -V 29 * IOError if the file couldn't be read
25 --version 30
26 Display version information and exit. 31 * msgfmt.PoSyntaxError if the po file has syntax errors
27 """ 32 """
28 33
29 from __future__ import print_function 34 import array
35 from ast import literal_eval
36 import codecs
37 from email.parser import HeaderParser
38 import struct
30 import sys 39 import sys
31 import os 40
32 import getopt 41 PY3 = sys.version_info[0] == 3
33 import struct 42 if PY3:
34 import array 43 def header_charset(s):
35 44 p = HeaderParser()
36 __version__ = "1.1" 45 return p.parsestr(s).get_content_charset()
37 46
38 MESSAGES = {} 47 import io
39 48 BytesIO = io.BytesIO
40 49 FILE_TYPE = io.IOBase
41 50 else:
42 def usage(code, msg=''): 51 def header_charset(s):
43 print(__doc__, file=sys.stderr) 52 p = HeaderParser()
44 if msg: 53 return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
45 print(msg, file=sys.stderr) 54
46 sys.exit(code) 55 from cStringIO import StringIO as BytesIO
47 56 FILE_TYPE = file
48 57
49 58
50 def add(id, str, fuzzy): 59 class PoSyntaxError(Exception):
51 "Add a non-fuzzy translation to the dictionary." 60 """ Syntax error in a po file """
52 global MESSAGES 61
53 if not fuzzy and str and not str.startswith('\0'): 62 def __init__(self, msg):
54 MESSAGES[id] = str 63 self.msg = msg
55 64
56 65 def __str__(self):
57 66 return 'Po file syntax error: %s' % self.msg
58 def generate(): 67
59 "Return the generated output." 68
60 global MESSAGES 69 class Msgfmt:
61 # the keys are sorted in the .mo file 70
62 keys = sorted(MESSAGES.keys()) 71 def __init__(self, po, name='unknown'):
63 offsets = [] 72 self.po = po
64 ids = strs = '' 73 self.name = name
65 for id in keys: 74 self.messages = {}
66 # For each string, we need size and file offset. Each string is NUL 75 self.openfile = False
67 # terminated; the NUL does not count into the size. 76 # Start off assuming latin-1, so everything decodes without failure,
68 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 77 # until we know the exact encoding
69 ids += id + '\0' 78 self.encoding = 'latin-1'
70 strs += MESSAGES[id] + '\0' 79
71 output = '' 80 def readPoData(self):
72 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 81 """ read po data from self.po and return an iterator """
73 # the keys start right after the index tables. 82 output = []
74 # translated string. 83 if isinstance(self.po, str):
75 keystart = 7*4+16*len(keys) 84 output = open(self.po, 'rb')
76 # and the values start after the keys 85 elif isinstance(self.po, FILE_TYPE):
77 valuestart = keystart + len(ids) 86 self.po.seek(0)
78 koffsets = [] 87 self.openfile = True
79 voffsets = [] 88 output = self.po
80 # The string table first has the list of keys, then the list of values. 89 elif isinstance(self.po, list):
81 # Each entry has first the size of the string, then the file offset. 90 output = self.po
82 for o1, l1, o2, l2 in offsets: 91 if not output:
83 koffsets += [l1, o1+keystart] 92 raise ValueError("self.po is invalid! %s" % type(self.po))
84 voffsets += [l2, o2+valuestart] 93 if isinstance(output, FILE_TYPE):
85 offsets = koffsets + voffsets 94 # remove BOM from the start of the parsed input
86 output = struct.pack("Iiiiiii", 95 first = output.readline()
87 0x950412de, # Magic 96 if len(first) == 0:
88 0, # Version 97 return output.readlines()
89 len(keys), # # of entries 98 if first.startswith(codecs.BOM_UTF8):
90 7*4, # start of key index 99 first = first.lstrip(codecs.BOM_UTF8)
91 7*4+len(keys)*8, # start of value index 100 return [first] + output.readlines()
92 0, 0) # size and offset of hash table 101 return output
93 output += array.array("i", offsets).tostring() 102
94 output += ids 103 def add(self, context, id, string, fuzzy):
95 output += strs 104 "Add a non-empty and non-fuzzy translation to the dictionary."
96 return output 105 if string and not fuzzy:
97 106 # The context is put before the id and separated by a EOT char.
98 107 if context:
99 108 id = context + u'\x04' + id
100 def make(filename, outfile): 109 if not id:
101 ID = 1 110 # See whether there is an encoding declaration
102 STR = 2 111 charset = header_charset(string)
103 global MESSAGES 112 if charset:
104 MESSAGES = {} 113 # decode header in proper encoding
105 114 string = string.encode(self.encoding).decode(charset)
106 msgid = None 115 if not PY3:
107 msgstr = None 116 # undo damage done by literal_eval in Python 2.x
108 117 string = string.encode(self.encoding).decode(charset)
109 # Compute .mo name from .po name and arguments 118 self.encoding = charset
110 if filename.endswith('.po'): 119 self.messages[id] = string
111 infile = filename 120
112 else: 121 def generate(self):
113 infile = filename + '.po' 122 "Return the generated output."
114 if outfile is None: 123 # the keys are sorted in the .mo file
115 outfile = os.path.splitext(infile)[0] + '.mo' 124 keys = sorted(self.messages.keys())
116 125 offsets = []
117 try: 126 ids = strs = b''
118 lines = open(infile).readlines() 127 for id in keys:
119 except IOError as msg: 128 msg = self.messages[id].encode(self.encoding)
120 print(msg, file=sys.stderr) 129 id = id.encode(self.encoding)
121 sys.exit(1) 130 # For each string, we need size and file offset. Each string is
122 131 # NUL terminated; the NUL does not count into the size.
123 # remove UTF-8 Byte Order Mark, if any. 132 offsets.append((len(ids), len(id), len(strs),
124 # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled) 133 len(msg)))
125 if lines[0].startswith('\xEF\xBB\xBF'): 134 ids += id + b'\0'
126 lines[0] = lines[0][3:] 135 strs += msg + b'\0'
127 136 output = b''
128 section = None 137 # The header is 7 32-bit unsigned integers. We don't use hash tables,
129 fuzzy = 0 138 # so the keys start right after the index tables.
130 139 keystart = 7 * 4 + 16 * len(keys)
131 # Parse the catalog 140 # and the values start after the keys
132 lno = 0 141 valuestart = keystart + len(ids)
133 for l in lines: 142 koffsets = []
134 lno += 1 143 voffsets = []
135 # If we get a comment line after a msgstr, this is a new entry 144 # The string table first has the list of keys, then the list of values.
136 if l[0] == '#' and section == STR: 145 # Each entry has first the size of the string, then the file offset.
137 add(msgid, msgstr, fuzzy) 146 for o1, l1, o2, l2 in offsets:
138 section = None 147 koffsets += [l1, o1 + keystart]
139 fuzzy = 0 148 voffsets += [l2, o2 + valuestart]
140 # Record a fuzzy mark 149 offsets = koffsets + voffsets
141 if l[:2] == '#,' and (l.find('fuzzy') >= 0): 150 # Even though we don't use a hashtable, we still set its offset to be
142 fuzzy = 1 151 # binary compatible with the gnu gettext format produced by:
143 # Skip comments 152 # msgfmt file.po --no-hash
144 if l[0] == '#': 153 output = struct.pack("Iiiiiii",
145 continue 154 0x950412de, # Magic
146 # Start of msgid_plural section, separate from singular form with \0 155 0, # Version
147 if l.startswith('msgid_plural'): 156 len(keys), # # of entries
148 msgid += '\0' 157 7 * 4, # start of key index
149 l = l[12:] 158 7 * 4 + len(keys) * 8, # start of value index
150 # Now we are in a msgid section, output previous section 159 0, keystart) # size and offset of hash table
151 elif l.startswith('msgid'): 160 if PY3:
152 if section == STR: 161 output += array.array("i", offsets).tobytes()
153 add(msgid, msgstr, fuzzy)
154 section = ID
155 l = l[5:]
156 msgid = msgstr = ''
157 # Now we are in a msgstr section
158 elif l.startswith('msgstr'):
159 section = STR
160 l = l[6:]
161 # Check for plural forms
162 if l.startswith('['):
163 # Separate plural forms with \0
164 if not l.startswith('[0]'):
165 msgstr += '\0'
166 # Ignore the index - must come in sequence
167 l = l[l.index(']') + 1:]
168 # Skip empty lines
169 l = l.strip()
170 if not l:
171 continue
172 # XXX: Does this always follow Python escape semantics?
173 l = eval(l)
174 if section == ID:
175 msgid += l
176 elif section == STR:
177 msgstr += l
178 else: 162 else:
179 print('Syntax error on %s:%d' % (infile, lno), 163 output += array.array("i", offsets).tostring()
180 'before:', file=sys.stderr) 164 output += ids
181 print(l, file=sys.stderr) 165 output += strs
182 sys.exit(1) 166 return output
183 # Add last entry 167
184 if section == STR: 168 def get(self):
185 add(msgid, msgstr, fuzzy) 169 """ """
186 170 self.read()
187 # Compute output 171 # Compute output
188 output = generate() 172 return self.generate()
189 173
190 try: 174 def read(self, header_only=False):
191 open(outfile,"wb").write(output) 175 """ """
192 except IOError as msg: 176 ID = 1
193 print(msg, file=sys.stderr) 177 STR = 2
194 178 CTXT = 3
195 179
196 180 section = None
197 def main(): 181 fuzzy = 0
198 try: 182 msgid = msgstr = msgctxt = u''
199 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 183
200 ['help', 'version', 'output-file=']) 184 # Parse the catalog
201 except getopt.error as msg: 185 lno = 0
202 usage(1, msg) 186 for l in self.readPoData():
203 187 l = l.decode(self.encoding)
204 outfile = None 188 lno += 1
205 # parse options 189 # If we get a comment line after a msgstr or a line starting with
206 for opt, arg in opts: 190 # msgid or msgctxt, this is a new entry
207 if opt in ('-h', '--help'): 191 if section == STR and (l[0] == '#' or (l[0] == 'm' and
208 usage(0) 192 (l.startswith('msgctxt') or l.startswith('msgid')))):
209 elif opt in ('-V', '--version'): 193 self.add(msgctxt, msgid, msgstr, fuzzy)
210 print("msgfmt.py", __version__, file=sys.stderr) 194 section = None
211 sys.exit(0) 195 fuzzy = 0
212 elif opt in ('-o', '--output-file'): 196 # If we only want the header we stop after the first message
213 outfile = arg 197 if header_only:
214 # do it 198 break
215 if not args: 199 # Record a fuzzy mark
216 print('No input file given', file=sys.stderr) 200 if l[:2] == '#,' and 'fuzzy' in l:
217 print("Try `msgfmt --help' for more information.", file=sys.stderr) 201 fuzzy = 1
218 return 202 # Skip comments
219 203 if l[0] == '#':
220 for filename in args: 204 continue
221 make(filename, outfile) 205 # Now we are in a msgctxt section
222 206 if l.startswith('msgctxt'):
223 207 section = CTXT
224 if __name__ == '__main__': 208 l = l[7:]
225 main() 209 msgctxt = u''
226 210 # Now we are in a msgid section, output previous section
227 # vim: set et sts=4 sw=4 : 211 elif (l.startswith('msgid') and
212 not l.startswith('msgid_plural')):
213 if section == STR:
214 self.add(msgid, msgstr, fuzzy)
215 section = ID
216 l = l[5:]
217 msgid = msgstr = u''
218 is_plural = False
219 # This is a message with plural forms
220 elif l.startswith('msgid_plural'):
221 if section != ID:
222 raise PoSyntaxError(
223 'msgid_plural not preceeded by '
224 'msgid on line %d of po file %s' %
225 (lno, repr(self.name)))
226 l = l[12:]
227 msgid += u'\0' # separator of singular and plural
228 is_plural = True
229 # Now we are in a msgstr section
230 elif l.startswith('msgstr'):
231 section = STR
232 if l.startswith('msgstr['):
233 if not is_plural:
234 raise PoSyntaxError(
235 'plural without msgid_plural '
236 'on line %d of po file %s' %
237 (lno, repr(self.name)))
238 l = l.split(']', 1)[1]
239 if msgstr:
240 # Separator of the various plural forms
241 msgstr += u'\0'
242 else:
243 if is_plural:
244 raise PoSyntaxError(
245 'indexed msgstr required for '
246 'plural on line %d of po file %s' %
247 (lno, repr(self.name)))
248 l = l[6:]
249 # Skip empty lines
250 l = l.strip()
251 if not l:
252 continue
253 # TODO: Does this always follow Python escape semantics?
254 try:
255 l = literal_eval(l)
256 except Exception as msg:
257 raise PoSyntaxError(
258 '%s (line %d of po file %s): \n%s' %
259 (msg, lno, repr(self.name), l))
260 if isinstance(l, bytes):
261 l = l.decode(self.encoding)
262 if section == CTXT:
263 msgctxt += l
264 elif section == ID:
265 msgid += l
266 elif section == STR:
267 msgstr += l
268 else:
269 raise PoSyntaxError(
270 'error on line %d of po file %s' %
271 (lno, repr(self.name)))
272
273 # Add last entry
274 if section == STR:
275 self.add(msgctxt, msgid, msgstr, fuzzy)
276
277 if self.openfile:
278 self.po.close()
279
280 def getAsFile(self):
281 return BytesIO(self.get())

Roundup Issue Tracker: http://roundup-tracker.org/