Mercurial > p > roundup > code
changeset 5450:f2fade4552c5
replaced msgfmt.py with latest version supporting Python 3
fixed setup scripts for Python 3
| author | Christof Meerwald <cmeerw@cmeerw.org> |
|---|---|
| date | Sat, 21 Jul 2018 16:29:20 +0100 |
| parents | ddf1cf299ebc |
| children | fe1bd8f12a9f |
| files | roundup/cgi/TAL/talgettext.py roundup/dist/command/build.py roundup/msgfmt.py |
| diffstat | 3 files changed, 254 insertions(+), 205 deletions(-) [+] |
line wrap: on
line diff
--- a/roundup/cgi/TAL/talgettext.py Thu Jul 19 22:24:12 2018 +0100 +++ b/roundup/cgi/TAL/talgettext.py Sat Jul 21 16:29:20 2018 +0100 @@ -66,12 +66,6 @@ NLSTR = '"\n"' -try: - True -except NameError: - True=1 - False=0 - def usage(code, msg=''): # Python 2.1 required print(__doc__, file=sys.stderr)
--- a/roundup/dist/command/build.py Thu Jul 19 22:24:12 2018 +0100 +++ b/roundup/dist/command/build.py Sat Jul 21 16:29:20 2018 +0100 @@ -49,7 +49,8 @@ _build_dst = os.path.join("build", _dst) command.mkpath(os.path.dirname(_build_dst)) command.announce("Compiling %s -> %s" % (_src, _build_dst)) - msgfmt.make(_src, _build_dst) + mo = msgfmt.Msgfmt(_src).get() + open(_build_dst, 'wb').write(mo) class build(base):
--- a/roundup/msgfmt.py Thu Jul 19 22:24:12 2018 +0100 +++ b/roundup/msgfmt.py Sat Jul 21 16:29:20 2018 +0100 @@ -1,227 +1,281 @@ #! /usr/bin/env python # -*- coding: iso-8859-1 -*- -# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> -# Plural forms support added by alexander smishlajev <alex@tycobka.lv> +# Written by Martin v. Loewis <loewis@informatik.hu-berlin.de> +# +# Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless +# translation service (PTS) of Zope +# +# Fixed some bugs and updated to support msgctxt +# by Hanno Schlichting <hanno@hannosch.eu> """Generate binary message catalog from textual translation description. This program converts a textual Uniforum-style message catalog (.po file) into -a binary GNU catalog (.mo file). This is essentially the same function as the +a binary GNU catalog (.mo file). This is essentially the same function as the GNU msgfmt program, however, it is a simpler implementation. -Usage: msgfmt.py [OPTIONS] filename.po +This file was taken from Python-2.3.2/Tools/i18n and altered in several ways. +Now you can simply use it from another python module: -Options: - -o file - --output-file=file - Specify the output file to write to. If omitted, output will go to a - file named filename.mo (based off the input file name). + from msgfmt import Msgfmt + mo = Msgfmt(po).get() - -h - --help - Print this message and exit. +where po is path to a po file as string, an opened po file ready for reading or +a list of strings (readlines of a po file) and mo is the compiled mo file as +binary string. - -V - --version - Display version information and exit. +Exceptions: + + * IOError if the file couldn't be read + + * msgfmt.PoSyntaxError if the po file has syntax errors """ -from __future__ import print_function +import array +from ast import literal_eval +import codecs +from email.parser import HeaderParser +import struct import sys -import os -import getopt -import struct -import array - -__version__ = "1.1" -MESSAGES = {} - +PY3 = sys.version_info[0] == 3 +if PY3: + def header_charset(s): + p = HeaderParser() + return p.parsestr(s).get_content_charset() - -def usage(code, msg=''): - print(__doc__, file=sys.stderr) - if msg: - print(msg, file=sys.stderr) - sys.exit(code) + import io + BytesIO = io.BytesIO + FILE_TYPE = io.IOBase +else: + def header_charset(s): + p = HeaderParser() + return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset() + + from cStringIO import StringIO as BytesIO + FILE_TYPE = file - -def add(id, str, fuzzy): - "Add a non-fuzzy translation to the dictionary." - global MESSAGES - if not fuzzy and str and not str.startswith('\0'): - MESSAGES[id] = str - +class PoSyntaxError(Exception): + """ Syntax error in a po file """ - -def generate(): - "Return the generated output." - global MESSAGES - # the keys are sorted in the .mo file - keys = sorted(MESSAGES.keys()) - offsets = [] - ids = strs = '' - for id in keys: - # For each string, we need size and file offset. Each string is NUL - # terminated; the NUL does not count into the size. - offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) - ids += id + '\0' - strs += MESSAGES[id] + '\0' - output = '' - # The header is 7 32-bit unsigned integers. We don't use hash tables, so - # the keys start right after the index tables. - # translated string. - keystart = 7*4+16*len(keys) - # and the values start after the keys - valuestart = keystart + len(ids) - koffsets = [] - voffsets = [] - # The string table first has the list of keys, then the list of values. - # Each entry has first the size of the string, then the file offset. - for o1, l1, o2, l2 in offsets: - koffsets += [l1, o1+keystart] - voffsets += [l2, o2+valuestart] - offsets = koffsets + voffsets - output = struct.pack("Iiiiiii", - 0x950412de, # Magic - 0, # Version - len(keys), # # of entries - 7*4, # start of key index - 7*4+len(keys)*8, # start of value index - 0, 0) # size and offset of hash table - output += array.array("i", offsets).tostring() - output += ids - output += strs - return output + def __init__(self, msg): + self.msg = msg + + def __str__(self): + return 'Po file syntax error: %s' % self.msg - -def make(filename, outfile): - ID = 1 - STR = 2 - global MESSAGES - MESSAGES = {} +class Msgfmt: + + def __init__(self, po, name='unknown'): + self.po = po + self.name = name + self.messages = {} + self.openfile = False + # Start off assuming latin-1, so everything decodes without failure, + # until we know the exact encoding + self.encoding = 'latin-1' - msgid = None - msgstr = None + def readPoData(self): + """ read po data from self.po and return an iterator """ + output = [] + if isinstance(self.po, str): + output = open(self.po, 'rb') + elif isinstance(self.po, FILE_TYPE): + self.po.seek(0) + self.openfile = True + output = self.po + elif isinstance(self.po, list): + output = self.po + if not output: + raise ValueError("self.po is invalid! %s" % type(self.po)) + if isinstance(output, FILE_TYPE): + # remove BOM from the start of the parsed input + first = output.readline() + if len(first) == 0: + return output.readlines() + if first.startswith(codecs.BOM_UTF8): + first = first.lstrip(codecs.BOM_UTF8) + return [first] + output.readlines() + return output + + def add(self, context, id, string, fuzzy): + "Add a non-empty and non-fuzzy translation to the dictionary." + if string and not fuzzy: + # The context is put before the id and separated by a EOT char. + if context: + id = context + u'\x04' + id + if not id: + # See whether there is an encoding declaration + charset = header_charset(string) + if charset: + # decode header in proper encoding + string = string.encode(self.encoding).decode(charset) + if not PY3: + # undo damage done by literal_eval in Python 2.x + string = string.encode(self.encoding).decode(charset) + self.encoding = charset + self.messages[id] = string - # Compute .mo name from .po name and arguments - if filename.endswith('.po'): - infile = filename - else: - infile = filename + '.po' - if outfile is None: - outfile = os.path.splitext(infile)[0] + '.mo' + def generate(self): + "Return the generated output." + # the keys are sorted in the .mo file + keys = sorted(self.messages.keys()) + offsets = [] + ids = strs = b'' + for id in keys: + msg = self.messages[id].encode(self.encoding) + id = id.encode(self.encoding) + # For each string, we need size and file offset. Each string is + # NUL terminated; the NUL does not count into the size. + offsets.append((len(ids), len(id), len(strs), + len(msg))) + ids += id + b'\0' + strs += msg + b'\0' + output = b'' + # The header is 7 32-bit unsigned integers. We don't use hash tables, + # so the keys start right after the index tables. + keystart = 7 * 4 + 16 * len(keys) + # and the values start after the keys + valuestart = keystart + len(ids) + koffsets = [] + voffsets = [] + # The string table first has the list of keys, then the list of values. + # Each entry has first the size of the string, then the file offset. + for o1, l1, o2, l2 in offsets: + koffsets += [l1, o1 + keystart] + voffsets += [l2, o2 + valuestart] + offsets = koffsets + voffsets + # Even though we don't use a hashtable, we still set its offset to be + # binary compatible with the gnu gettext format produced by: + # msgfmt file.po --no-hash + output = struct.pack("Iiiiiii", + 0x950412de, # Magic + 0, # Version + len(keys), # # of entries + 7 * 4, # start of key index + 7 * 4 + len(keys) * 8, # start of value index + 0, keystart) # size and offset of hash table + if PY3: + output += array.array("i", offsets).tobytes() + else: + output += array.array("i", offsets).tostring() + output += ids + output += strs + return output - try: - lines = open(infile).readlines() - except IOError as msg: - print(msg, file=sys.stderr) - sys.exit(1) - - # remove UTF-8 Byte Order Mark, if any. - # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled) - if lines[0].startswith('\xEF\xBB\xBF'): - lines[0] = lines[0][3:] - - section = None - fuzzy = 0 + def get(self): + """ """ + self.read() + # Compute output + return self.generate() - # Parse the catalog - lno = 0 - for l in lines: - lno += 1 - # If we get a comment line after a msgstr, this is a new entry - if l[0] == '#' and section == STR: - add(msgid, msgstr, fuzzy) - section = None - fuzzy = 0 - # Record a fuzzy mark - if l[:2] == '#,' and (l.find('fuzzy') >= 0): - fuzzy = 1 - # Skip comments - if l[0] == '#': - continue - # Start of msgid_plural section, separate from singular form with \0 - if l.startswith('msgid_plural'): - msgid += '\0' - l = l[12:] - # Now we are in a msgid section, output previous section - elif l.startswith('msgid'): - if section == STR: - add(msgid, msgstr, fuzzy) - section = ID - l = l[5:] - msgid = msgstr = '' - # Now we are in a msgstr section - elif l.startswith('msgstr'): - section = STR - l = l[6:] - # Check for plural forms - if l.startswith('['): - # Separate plural forms with \0 - if not l.startswith('[0]'): - msgstr += '\0' - # Ignore the index - must come in sequence - l = l[l.index(']') + 1:] - # Skip empty lines - l = l.strip() - if not l: - continue - # XXX: Does this always follow Python escape semantics? - l = eval(l) - if section == ID: - msgid += l - elif section == STR: - msgstr += l - else: - print('Syntax error on %s:%d' % (infile, lno), - 'before:', file=sys.stderr) - print(l, file=sys.stderr) - sys.exit(1) - # Add last entry - if section == STR: - add(msgid, msgstr, fuzzy) + def read(self, header_only=False): + """ """ + ID = 1 + STR = 2 + CTXT = 3 + + section = None + fuzzy = 0 + msgid = msgstr = msgctxt = u'' - # Compute output - output = generate() - - try: - open(outfile,"wb").write(output) - except IOError as msg: - print(msg, file=sys.stderr) - - - -def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], 'hVo:', - ['help', 'version', 'output-file=']) - except getopt.error as msg: - usage(1, msg) + # Parse the catalog + lno = 0 + for l in self.readPoData(): + l = l.decode(self.encoding) + lno += 1 + # If we get a comment line after a msgstr or a line starting with + # msgid or msgctxt, this is a new entry + if section == STR and (l[0] == '#' or (l[0] == 'm' and + (l.startswith('msgctxt') or l.startswith('msgid')))): + self.add(msgctxt, msgid, msgstr, fuzzy) + section = None + fuzzy = 0 + # If we only want the header we stop after the first message + if header_only: + break + # Record a fuzzy mark + if l[:2] == '#,' and 'fuzzy' in l: + fuzzy = 1 + # Skip comments + if l[0] == '#': + continue + # Now we are in a msgctxt section + if l.startswith('msgctxt'): + section = CTXT + l = l[7:] + msgctxt = u'' + # Now we are in a msgid section, output previous section + elif (l.startswith('msgid') and + not l.startswith('msgid_plural')): + if section == STR: + self.add(msgid, msgstr, fuzzy) + section = ID + l = l[5:] + msgid = msgstr = u'' + is_plural = False + # This is a message with plural forms + elif l.startswith('msgid_plural'): + if section != ID: + raise PoSyntaxError( + 'msgid_plural not preceeded by ' + 'msgid on line %d of po file %s' % + (lno, repr(self.name))) + l = l[12:] + msgid += u'\0' # separator of singular and plural + is_plural = True + # Now we are in a msgstr section + elif l.startswith('msgstr'): + section = STR + if l.startswith('msgstr['): + if not is_plural: + raise PoSyntaxError( + 'plural without msgid_plural ' + 'on line %d of po file %s' % + (lno, repr(self.name))) + l = l.split(']', 1)[1] + if msgstr: + # Separator of the various plural forms + msgstr += u'\0' + else: + if is_plural: + raise PoSyntaxError( + 'indexed msgstr required for ' + 'plural on line %d of po file %s' % + (lno, repr(self.name))) + l = l[6:] + # Skip empty lines + l = l.strip() + if not l: + continue + # TODO: Does this always follow Python escape semantics? + try: + l = literal_eval(l) + except Exception as msg: + raise PoSyntaxError( + '%s (line %d of po file %s): \n%s' % + (msg, lno, repr(self.name), l)) + if isinstance(l, bytes): + l = l.decode(self.encoding) + if section == CTXT: + msgctxt += l + elif section == ID: + msgid += l + elif section == STR: + msgstr += l + else: + raise PoSyntaxError( + 'error on line %d of po file %s' % + (lno, repr(self.name))) - outfile = None - # parse options - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-V', '--version'): - print("msgfmt.py", __version__, file=sys.stderr) - sys.exit(0) - elif opt in ('-o', '--output-file'): - outfile = arg - # do it - if not args: - print('No input file given', file=sys.stderr) - print("Try `msgfmt --help' for more information.", file=sys.stderr) - return + # Add last entry + if section == STR: + self.add(msgctxt, msgid, msgstr, fuzzy) - for filename in args: - make(filename, outfile) - + if self.openfile: + self.po.close() -if __name__ == '__main__': - main() - -# vim: set et sts=4 sw=4 : + def getAsFile(self): + return BytesIO(self.get())
