changeset 5450:f2fade4552c5

replaced msgfmt.py with latest version supporting Python 3 fixed setup scripts for Python 3
author Christof Meerwald <cmeerw@cmeerw.org>
date Sat, 21 Jul 2018 16:29:20 +0100
parents ddf1cf299ebc
children fe1bd8f12a9f
files roundup/cgi/TAL/talgettext.py roundup/dist/command/build.py roundup/msgfmt.py
diffstat 3 files changed, 254 insertions(+), 205 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/cgi/TAL/talgettext.py	Thu Jul 19 22:24:12 2018 +0100
+++ b/roundup/cgi/TAL/talgettext.py	Sat Jul 21 16:29:20 2018 +0100
@@ -66,12 +66,6 @@
 
 NLSTR = '"\n"'
 
-try:
-    True
-except NameError:
-    True=1
-    False=0
-
 def usage(code, msg=''):
     # Python 2.1 required
     print(__doc__, file=sys.stderr)
--- a/roundup/dist/command/build.py	Thu Jul 19 22:24:12 2018 +0100
+++ b/roundup/dist/command/build.py	Sat Jul 21 16:29:20 2018 +0100
@@ -49,7 +49,8 @@
         _build_dst = os.path.join("build", _dst)
         command.mkpath(os.path.dirname(_build_dst))
         command.announce("Compiling %s -> %s" % (_src, _build_dst))
-        msgfmt.make(_src, _build_dst)
+        mo = msgfmt.Msgfmt(_src).get()
+        open(_build_dst, 'wb').write(mo)
 
 
 class build(base):
--- a/roundup/msgfmt.py	Thu Jul 19 22:24:12 2018 +0100
+++ b/roundup/msgfmt.py	Sat Jul 21 16:29:20 2018 +0100
@@ -1,227 +1,281 @@
 #! /usr/bin/env python
 # -*- coding: iso-8859-1 -*-
-# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
-# Plural forms support added by alexander smishlajev <alex@tycobka.lv>
+# Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
+#
+# Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless
+# translation service (PTS) of Zope
+#
+# Fixed some bugs and updated to support msgctxt
+# by Hanno Schlichting <hanno@hannosch.eu>
 
 """Generate binary message catalog from textual translation description.
 
 This program converts a textual Uniforum-style message catalog (.po file) into
-a binary GNU catalog (.mo file).  This is essentially the same function as the
+a binary GNU catalog (.mo file). This is essentially the same function as the
 GNU msgfmt program, however, it is a simpler implementation.
 
-Usage: msgfmt.py [OPTIONS] filename.po
+This file was taken from Python-2.3.2/Tools/i18n and altered in several ways.
+Now you can simply use it from another python module:
 
-Options:
-    -o file
-    --output-file=file
-        Specify the output file to write to.  If omitted, output will go to a
-        file named filename.mo (based off the input file name).
+  from msgfmt import Msgfmt
+  mo = Msgfmt(po).get()
 
-    -h
-    --help
-        Print this message and exit.
+where po is path to a po file as string, an opened po file ready for reading or
+a list of strings (readlines of a po file) and mo is the compiled mo file as
+binary string.
 
-    -V
-    --version
-        Display version information and exit.
+Exceptions:
+
+  * IOError if the file couldn't be read
+
+  * msgfmt.PoSyntaxError if the po file has syntax errors
 """
 
-from __future__ import print_function
+import array
+from ast import literal_eval
+import codecs
+from email.parser import HeaderParser
+import struct
 import sys
-import os
-import getopt
-import struct
-import array
-
-__version__ = "1.1"
 
-MESSAGES = {}
-
+PY3 = sys.version_info[0] == 3
+if PY3:
+    def header_charset(s):
+        p = HeaderParser()
+        return p.parsestr(s).get_content_charset()
 
-
-def usage(code, msg=''):
-    print(__doc__, file=sys.stderr)
-    if msg:
-        print(msg, file=sys.stderr)
-    sys.exit(code)
+    import io
+    BytesIO = io.BytesIO
+    FILE_TYPE = io.IOBase
+else:
+    def header_charset(s):
+        p = HeaderParser()
+        return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
+
+    from cStringIO import StringIO as BytesIO
+    FILE_TYPE = file
 
 
-
-def add(id, str, fuzzy):
-    "Add a non-fuzzy translation to the dictionary."
-    global MESSAGES
-    if not fuzzy and str and not str.startswith('\0'):
-        MESSAGES[id] = str
-
+class PoSyntaxError(Exception):
+    """ Syntax error in a po file """
 
-
-def generate():
-    "Return the generated output."
-    global MESSAGES
-    # the keys are sorted in the .mo file
-    keys = sorted(MESSAGES.keys())
-    offsets = []
-    ids = strs = ''
-    for id in keys:
-        # For each string, we need size and file offset.  Each string is NUL
-        # terminated; the NUL does not count into the size.
-        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
-        ids += id + '\0'
-        strs += MESSAGES[id] + '\0'
-    output = ''
-    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
-    # the keys start right after the index tables.
-    # translated string.
-    keystart = 7*4+16*len(keys)
-    # and the values start after the keys
-    valuestart = keystart + len(ids)
-    koffsets = []
-    voffsets = []
-    # The string table first has the list of keys, then the list of values.
-    # Each entry has first the size of the string, then the file offset.
-    for o1, l1, o2, l2 in offsets:
-        koffsets += [l1, o1+keystart]
-        voffsets += [l2, o2+valuestart]
-    offsets = koffsets + voffsets
-    output = struct.pack("Iiiiiii",
-                         0x950412de,        # Magic
-                         0,                 # Version
-                         len(keys),         # # of entries
-                         7*4,               # start of key index
-                         7*4+len(keys)*8,   # start of value index
-                         0, 0)              # size and offset of hash table
-    output += array.array("i", offsets).tostring()
-    output += ids
-    output += strs
-    return output
+    def __init__(self, msg):
+        self.msg = msg
+
+    def __str__(self):
+        return 'Po file syntax error: %s' % self.msg
 
 
-
-def make(filename, outfile):
-    ID = 1
-    STR = 2
-    global MESSAGES
-    MESSAGES = {}
+class Msgfmt:
+
+    def __init__(self, po, name='unknown'):
+        self.po = po
+        self.name = name
+        self.messages = {}
+        self.openfile = False
+        # Start off assuming latin-1, so everything decodes without failure,
+        # until we know the exact encoding
+        self.encoding = 'latin-1'
 
-    msgid = None
-    msgstr = None
+    def readPoData(self):
+        """ read po data from self.po and return an iterator """
+        output = []
+        if isinstance(self.po, str):
+            output = open(self.po, 'rb')
+        elif isinstance(self.po, FILE_TYPE):
+            self.po.seek(0)
+            self.openfile = True
+            output = self.po
+        elif isinstance(self.po, list):
+            output = self.po
+        if not output:
+            raise ValueError("self.po is invalid! %s" % type(self.po))
+        if isinstance(output, FILE_TYPE):
+            # remove BOM from the start of the parsed input
+            first = output.readline()
+            if len(first) == 0:
+                return output.readlines()
+            if first.startswith(codecs.BOM_UTF8):
+                first = first.lstrip(codecs.BOM_UTF8)
+            return [first] + output.readlines()
+        return output
+
+    def add(self, context, id, string, fuzzy):
+        "Add a non-empty and non-fuzzy translation to the dictionary."
+        if string and not fuzzy:
+            # The context is put before the id and separated by a EOT char.
+            if context:
+                id = context + u'\x04' + id
+            if not id:
+                # See whether there is an encoding declaration
+                charset = header_charset(string)
+                if charset:
+                    # decode header in proper encoding
+                    string = string.encode(self.encoding).decode(charset)
+                    if not PY3:
+                        # undo damage done by literal_eval in Python 2.x
+                        string = string.encode(self.encoding).decode(charset)
+                    self.encoding = charset
+            self.messages[id] = string
 
-    # Compute .mo name from .po name and arguments
-    if filename.endswith('.po'):
-        infile = filename
-    else:
-        infile = filename + '.po'
-    if outfile is None:
-        outfile = os.path.splitext(infile)[0] + '.mo'
+    def generate(self):
+        "Return the generated output."
+        # the keys are sorted in the .mo file
+        keys = sorted(self.messages.keys())
+        offsets = []
+        ids = strs = b''
+        for id in keys:
+            msg = self.messages[id].encode(self.encoding)
+            id = id.encode(self.encoding)
+            # For each string, we need size and file offset. Each string is
+            # NUL terminated; the NUL does not count into the size.
+            offsets.append((len(ids), len(id), len(strs),
+                            len(msg)))
+            ids += id + b'\0'
+            strs += msg + b'\0'
+        output = b''
+        # The header is 7 32-bit unsigned integers. We don't use hash tables,
+        # so the keys start right after the index tables.
+        keystart = 7 * 4 + 16 * len(keys)
+        # and the values start after the keys
+        valuestart = keystart + len(ids)
+        koffsets = []
+        voffsets = []
+        # The string table first has the list of keys, then the list of values.
+        # Each entry has first the size of the string, then the file offset.
+        for o1, l1, o2, l2 in offsets:
+            koffsets += [l1, o1 + keystart]
+            voffsets += [l2, o2 + valuestart]
+        offsets = koffsets + voffsets
+        # Even though we don't use a hashtable, we still set its offset to be
+        # binary compatible with the gnu gettext format produced by:
+        # msgfmt file.po --no-hash
+        output = struct.pack("Iiiiiii",
+                             0x950412de,        # Magic
+                             0,                 # Version
+                             len(keys),         # # of entries
+                             7 * 4,             # start of key index
+                             7 * 4 + len(keys) * 8,  # start of value index
+                             0, keystart)       # size and offset of hash table
+        if PY3:
+            output += array.array("i", offsets).tobytes()
+        else:
+            output += array.array("i", offsets).tostring()
+        output += ids
+        output += strs
+        return output
 
-    try:
-        lines = open(infile).readlines()
-    except IOError as msg:
-        print(msg, file=sys.stderr)
-        sys.exit(1)
-
-    # remove UTF-8 Byte Order Mark, if any.
-    # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled)
-    if lines[0].startswith('\xEF\xBB\xBF'):
-        lines[0] = lines[0][3:]
-
-    section = None
-    fuzzy = 0
+    def get(self):
+        """ """
+        self.read()
+        # Compute output
+        return self.generate()
 
-    # Parse the catalog
-    lno = 0
-    for l in lines:
-        lno += 1
-        # If we get a comment line after a msgstr, this is a new entry
-        if l[0] == '#' and section == STR:
-            add(msgid, msgstr, fuzzy)
-            section = None
-            fuzzy = 0
-        # Record a fuzzy mark
-        if l[:2] == '#,' and (l.find('fuzzy') >= 0):
-            fuzzy = 1
-        # Skip comments
-        if l[0] == '#':
-            continue
-        # Start of msgid_plural section, separate from singular form with \0
-        if l.startswith('msgid_plural'):
-            msgid += '\0'
-            l = l[12:]
-        # Now we are in a msgid section, output previous section
-        elif l.startswith('msgid'):
-            if section == STR:
-                add(msgid, msgstr, fuzzy)
-            section = ID
-            l = l[5:]
-            msgid = msgstr = ''
-        # Now we are in a msgstr section
-        elif l.startswith('msgstr'):
-            section = STR
-            l = l[6:]
-            # Check for plural forms
-            if l.startswith('['):
-                # Separate plural forms with \0
-                if not l.startswith('[0]'):
-                    msgstr += '\0'
-                # Ignore the index - must come in sequence
-                l = l[l.index(']') + 1:]
-        # Skip empty lines
-        l = l.strip()
-        if not l:
-            continue
-        # XXX: Does this always follow Python escape semantics?
-        l = eval(l)
-        if section == ID:
-            msgid += l
-        elif section == STR:
-            msgstr += l
-        else:
-            print('Syntax error on %s:%d' % (infile, lno),
-                  'before:', file=sys.stderr)
-            print(l, file=sys.stderr)
-            sys.exit(1)
-    # Add last entry
-    if section == STR:
-        add(msgid, msgstr, fuzzy)
+    def read(self, header_only=False):
+        """ """
+        ID = 1
+        STR = 2
+        CTXT = 3
+
+        section = None
+        fuzzy = 0
+        msgid = msgstr = msgctxt = u''
 
-    # Compute output
-    output = generate()
-
-    try:
-        open(outfile,"wb").write(output)
-    except IOError as msg:
-        print(msg, file=sys.stderr)
-
-
-
-def main():
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
-                                   ['help', 'version', 'output-file='])
-    except getopt.error as msg:
-        usage(1, msg)
+        # Parse the catalog
+        lno = 0
+        for l in self.readPoData():
+            l = l.decode(self.encoding)
+            lno += 1
+            # If we get a comment line after a msgstr or a line starting with
+            # msgid or msgctxt, this is a new entry
+            if section == STR and (l[0] == '#' or (l[0] == 'm' and
+               (l.startswith('msgctxt') or l.startswith('msgid')))):
+                self.add(msgctxt, msgid, msgstr, fuzzy)
+                section = None
+                fuzzy = 0
+                # If we only want the header we stop after the first message
+                if header_only:
+                    break
+            # Record a fuzzy mark
+            if l[:2] == '#,' and 'fuzzy' in l:
+                fuzzy = 1
+            # Skip comments
+            if l[0] == '#':
+                continue
+            # Now we are in a msgctxt section
+            if l.startswith('msgctxt'):
+                section = CTXT
+                l = l[7:]
+                msgctxt = u''
+            # Now we are in a msgid section, output previous section
+            elif (l.startswith('msgid') and
+                  not l.startswith('msgid_plural')):
+                if section == STR:
+                    self.add(msgid, msgstr, fuzzy)
+                section = ID
+                l = l[5:]
+                msgid = msgstr = u''
+                is_plural = False
+            # This is a message with plural forms
+            elif l.startswith('msgid_plural'):
+                if section != ID:
+                    raise PoSyntaxError(
+                        'msgid_plural not preceeded by '
+                        'msgid on line %d of po file %s' %
+                        (lno, repr(self.name)))
+                l = l[12:]
+                msgid += u'\0'  # separator of singular and plural
+                is_plural = True
+            # Now we are in a msgstr section
+            elif l.startswith('msgstr'):
+                section = STR
+                if l.startswith('msgstr['):
+                    if not is_plural:
+                        raise PoSyntaxError(
+                            'plural without msgid_plural '
+                            'on line %d of po file %s' %
+                            (lno, repr(self.name)))
+                    l = l.split(']', 1)[1]
+                    if msgstr:
+                        # Separator of the various plural forms
+                        msgstr += u'\0'
+                else:
+                    if is_plural:
+                        raise PoSyntaxError(
+                            'indexed msgstr required for '
+                            'plural on line %d of po file %s' %
+                            (lno, repr(self.name)))
+                    l = l[6:]
+            # Skip empty lines
+            l = l.strip()
+            if not l:
+                continue
+            # TODO: Does this always follow Python escape semantics?
+            try:
+                l = literal_eval(l)
+            except Exception as msg:
+                raise PoSyntaxError(
+                    '%s (line %d of po file %s): \n%s' %
+                    (msg, lno, repr(self.name), l))
+            if isinstance(l, bytes):
+                l = l.decode(self.encoding)
+            if section == CTXT:
+                msgctxt += l
+            elif section == ID:
+                msgid += l
+            elif section == STR:
+                msgstr += l
+            else:
+                raise PoSyntaxError(
+                    'error on line %d of po file %s' %
+                    (lno, repr(self.name)))
 
-    outfile = None
-    # parse options
-    for opt, arg in opts:
-        if opt in ('-h', '--help'):
-            usage(0)
-        elif opt in ('-V', '--version'):
-            print("msgfmt.py", __version__, file=sys.stderr)
-            sys.exit(0)
-        elif opt in ('-o', '--output-file'):
-            outfile = arg
-    # do it
-    if not args:
-        print('No input file given', file=sys.stderr)
-        print("Try `msgfmt --help' for more information.", file=sys.stderr)
-        return
+        # Add last entry
+        if section == STR:
+            self.add(msgctxt, msgid, msgstr, fuzzy)
 
-    for filename in args:
-        make(filename, outfile)
-
+        if self.openfile:
+            self.po.close()
 
-if __name__ == '__main__':
-    main()
-
-# vim: set et sts=4 sw=4 :
+    def getAsFile(self):
+        return BytesIO(self.get())

Roundup Issue Tracker: http://roundup-tracker.org/