Mercurial Repository: p/roundup/code: roundup/msgfmt.py comparison

comparison roundup/msgfmt.py @ 5450:f2fade4552c5

replaced msgfmt.py with latest version supporting Python 3 fixed setup scripts for Python 3

author	Christof Meerwald <cmeerw@cmeerw.org>
date	Sat, 21 Jul 2018 16:29:20 +0100
parents	23b8e6067f7c
children	4d2e1fa03f0f

comparison

equal deleted inserted replaced

-:ddf1cf299ebc
+:f2fade4552c5
 #! /usr/bin/env python
 # -*- coding: iso-8859-1 -*-
-# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
+# Written by Martin v. Loewis <loewis@informatik.hu-berlin.de>
-# Plural forms support added by alexander smishlajev <alex@tycobka.lv>
+#
+# Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless
+# translation service (PTS) of Zope
+#
+# Fixed some bugs and updated to support msgctxt
+# by Hanno Schlichting <hanno@hannosch.eu>
 """Generate binary message catalog from textual translation description.
 This program converts a textual Uniforum-style message catalog (.po file) into
-a binary GNU catalog (.mo file).  This is essentially the same function as the
+a binary GNU catalog (.mo file). This is essentially the same function as the
 GNU msgfmt program, however, it is a simpler implementation.
-Usage: msgfmt.py [OPTIONS] filename.po
+This file was taken from Python-2.3.2/Tools/i18n and altered in several ways.
+Now you can simply use it from another python module:
-Options:
--o file
+from msgfmt import Msgfmt
---output-file=file
+mo = Msgfmt(po).get()
-Specify the output file to write to.  If omitted, output will go to a
-file named filename.mo (based off the input file name).
+where po is path to a po file as string, an opened po file ready for reading or
+a list of strings (readlines of a po file) and mo is the compiled mo file as
--h
+binary string.
---help
-Print this message and exit.
+Exceptions:
--V
+* IOError if the file couldn't be read
---version
-Display version information and exit.
+* msgfmt.PoSyntaxError if the po file has syntax errors
 """
-from __future__ import print_function
+import array
+from ast import literal_eval
+import codecs
+from email.parser import HeaderParser
+import struct
 import sys
-import os
-import getopt
+PY3 = sys.version_info[0] == 3
-import struct
+if PY3:
-import array
+def header_charset(s):
+p = HeaderParser()
-__version__ = "1.1"
+return p.parsestr(s).get_content_charset()
-MESSAGES = {}
+import io
+BytesIO = io.BytesIO
+FILE_TYPE = io.IOBase
+else:
-def usage(code, msg=''):
+def header_charset(s):
-print(__doc__, file=sys.stderr)
+p = HeaderParser()
-if msg:
+return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset()
-print(msg, file=sys.stderr)
-sys.exit(code)
+from cStringIO import StringIO as BytesIO
+FILE_TYPE = file
-def add(id, str, fuzzy):
+class PoSyntaxError(Exception):
-"Add a non-fuzzy translation to the dictionary."
+""" Syntax error in a po file """
-global MESSAGES
-if not fuzzy and str and not str.startswith('\0'):
+def __init__(self, msg):
-MESSAGES[id] = str
+self.msg = msg
+def __str__(self):
+return 'Po file syntax error: %s' % self.msg
-def generate():
-"Return the generated output."
-global MESSAGES
+class Msgfmt:
-# the keys are sorted in the .mo file
-keys = sorted(MESSAGES.keys())
+def __init__(self, po, name='unknown'):
-offsets = []
+self.po = po
-ids = strs = ''
+self.name = name
-for id in keys:
+self.messages = {}
-# For each string, we need size and file offset.  Each string is NUL
+self.openfile = False
-# terminated; the NUL does not count into the size.
+# Start off assuming latin-1, so everything decodes without failure,
-offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
+# until we know the exact encoding
-ids += id + '\0'
+self.encoding = 'latin-1'
-strs += MESSAGES[id] + '\0'
-output = ''
+def readPoData(self):
-# The header is 7 32-bit unsigned integers.  We don't use hash tables, so
+""" read po data from self.po and return an iterator """
-# the keys start right after the index tables.
+output = []
-# translated string.
+if isinstance(self.po, str):
-keystart = 7*4+16*len(keys)
+output = open(self.po, 'rb')
-# and the values start after the keys
+elif isinstance(self.po, FILE_TYPE):
-valuestart = keystart + len(ids)
+self.po.seek(0)
-koffsets = []
+self.openfile = True
-voffsets = []
+output = self.po
-# The string table first has the list of keys, then the list of values.
+elif isinstance(self.po, list):
-# Each entry has first the size of the string, then the file offset.
+output = self.po
-for o1, l1, o2, l2 in offsets:
+if not output:
-koffsets += [l1, o1+keystart]
+raise ValueError("self.po is invalid! %s" % type(self.po))
-voffsets += [l2, o2+valuestart]
+if isinstance(output, FILE_TYPE):
-offsets = koffsets + voffsets
+# remove BOM from the start of the parsed input
-output = struct.pack("Iiiiiii",
+first = output.readline()
-0x950412de,        # Magic
+if len(first) == 0:
-0,                 # Version
+return output.readlines()
-len(keys),         # # of entries
+if first.startswith(codecs.BOM_UTF8):
-7*4,               # start of key index
+first = first.lstrip(codecs.BOM_UTF8)
-7*4+len(keys)*8,   # start of value index
+return [first] + output.readlines()
-0, 0)              # size and offset of hash table
+return output
-output += array.array("i", offsets).tostring()
-output += ids
+def add(self, context, id, string, fuzzy):
-output += strs
+"Add a non-empty and non-fuzzy translation to the dictionary."
-return output
+if string and not fuzzy:
+# The context is put before the id and separated by a EOT char.
+if context:
+id = context + u'\x04' + id
-def make(filename, outfile):
+if not id:
-ID = 1
+# See whether there is an encoding declaration
-STR = 2
+charset = header_charset(string)
-global MESSAGES
+if charset:
-MESSAGES = {}
+# decode header in proper encoding
+string = string.encode(self.encoding).decode(charset)
-msgid = None
+if not PY3:
-msgstr = None
+# undo damage done by literal_eval in Python 2.x
+string = string.encode(self.encoding).decode(charset)
-# Compute .mo name from .po name and arguments
+self.encoding = charset
-if filename.endswith('.po'):
+self.messages[id] = string
-infile = filename
-else:
+def generate(self):
-infile = filename + '.po'
+"Return the generated output."
-if outfile is None:
+# the keys are sorted in the .mo file
-outfile = os.path.splitext(infile)[0] + '.mo'
+keys = sorted(self.messages.keys())
+offsets = []
-try:
+ids = strs = b''
-lines = open(infile).readlines()
+for id in keys:
-except IOError as msg:
+msg = self.messages[id].encode(self.encoding)
-print(msg, file=sys.stderr)
+id = id.encode(self.encoding)
-sys.exit(1)
+# For each string, we need size and file offset. Each string is
+# NUL terminated; the NUL does not count into the size.
-# remove UTF-8 Byte Order Mark, if any.
+offsets.append((len(ids), len(id), len(strs),
-# (UCS2 BOMs are not handled because messages in UCS2 cannot be handled)
+len(msg)))
-if lines[0].startswith('\xEF\xBB\xBF'):
+ids += id + b'\0'
-lines[0] = lines[0][3:]
+strs += msg + b'\0'
+output = b''
-section = None
+# The header is 7 32-bit unsigned integers. We don't use hash tables,
-fuzzy = 0
+# so the keys start right after the index tables.
+keystart = 7 * 4 + 16 * len(keys)
-# Parse the catalog
+# and the values start after the keys
-lno = 0
+valuestart = keystart + len(ids)
-for l in lines:
+koffsets = []
-lno += 1
+voffsets = []
-# If we get a comment line after a msgstr, this is a new entry
+# The string table first has the list of keys, then the list of values.
-if l[0] == '#' and section == STR:
+# Each entry has first the size of the string, then the file offset.
-add(msgid, msgstr, fuzzy)
+for o1, l1, o2, l2 in offsets:
-section = None
+koffsets += [l1, o1 + keystart]
-fuzzy = 0
+voffsets += [l2, o2 + valuestart]
-# Record a fuzzy mark
+offsets = koffsets + voffsets
-if l[:2] == '#,' and (l.find('fuzzy') >= 0):
+# Even though we don't use a hashtable, we still set its offset to be
-fuzzy = 1
+# binary compatible with the gnu gettext format produced by:
-# Skip comments
+# msgfmt file.po --no-hash
-if l[0] == '#':
+output = struct.pack("Iiiiiii",
-continue
+0x950412de,        # Magic
-# Start of msgid_plural section, separate from singular form with \0
+0,                 # Version
-if l.startswith('msgid_plural'):
+len(keys),         # # of entries
-msgid += '\0'
+7 * 4,             # start of key index
-l = l[12:]
+7 * 4 + len(keys) * 8,  # start of value index
-# Now we are in a msgid section, output previous section
+0, keystart)       # size and offset of hash table
-elif l.startswith('msgid'):
+if PY3:
-if section == STR:
+output += array.array("i", offsets).tobytes()
-add(msgid, msgstr, fuzzy)
-section = ID
-l = l[5:]
-msgid = msgstr = ''
-# Now we are in a msgstr section
-elif l.startswith('msgstr'):
-section = STR
-l = l[6:]
-# Check for plural forms
-if l.startswith('['):
-# Separate plural forms with \0
-if not l.startswith('[0]'):
-msgstr += '\0'
-# Ignore the index - must come in sequence
-l = l[l.index(']') + 1:]
-# Skip empty lines
-l = l.strip()
-if not l:
-continue
-# XXX: Does this always follow Python escape semantics?
-l = eval(l)
-if section == ID:
-msgid += l
-elif section == STR:
-msgstr += l
 else:
-print('Syntax error on %s:%d' % (infile, lno),
+output += array.array("i", offsets).tostring()
-'before:', file=sys.stderr)
+output += ids
-print(l, file=sys.stderr)
+output += strs
-sys.exit(1)
+return output
-# Add last entry
-if section == STR:
+def get(self):
-add(msgid, msgstr, fuzzy)
+""" """
+self.read()
 # Compute output
-output = generate()
+return self.generate()
-try:
+def read(self, header_only=False):
-open(outfile,"wb").write(output)
+""" """
-except IOError as msg:
+ID = 1
-print(msg, file=sys.stderr)
+STR = 2
+CTXT = 3
+section = None
-def main():
+fuzzy = 0
-try:
+msgid = msgstr = msgctxt = u''
-opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
-['help', 'version', 'output-file='])
+# Parse the catalog
-except getopt.error as msg:
+lno = 0
-usage(1, msg)
+for l in self.readPoData():
+l = l.decode(self.encoding)
-outfile = None
+lno += 1
-# parse options
+# If we get a comment line after a msgstr or a line starting with
-for opt, arg in opts:
+# msgid or msgctxt, this is a new entry
-if opt in ('-h', '--help'):
+if section == STR and (l[0] == '#' or (l[0] == 'm' and
-usage(0)
+(l.startswith('msgctxt') or l.startswith('msgid')))):
-elif opt in ('-V', '--version'):
+self.add(msgctxt, msgid, msgstr, fuzzy)
-print("msgfmt.py", __version__, file=sys.stderr)
+section = None
-sys.exit(0)
+fuzzy = 0
-elif opt in ('-o', '--output-file'):
+# If we only want the header we stop after the first message
-outfile = arg
+if header_only:
-# do it
+break
-if not args:
+# Record a fuzzy mark
-print('No input file given', file=sys.stderr)
+if l[:2] == '#,' and 'fuzzy' in l:
-print("Try `msgfmt --help' for more information.", file=sys.stderr)
+fuzzy = 1
-return
+# Skip comments
+if l[0] == '#':
-for filename in args:
+continue
-make(filename, outfile)
+# Now we are in a msgctxt section
+if l.startswith('msgctxt'):
+section = CTXT
-if __name__ == '__main__':
+l = l[7:]
-main()
+msgctxt = u''
+# Now we are in a msgid section, output previous section
-# vim: set et sts=4 sw=4 :
+elif (l.startswith('msgid') and
+not l.startswith('msgid_plural')):
+if section == STR:
+self.add(msgid, msgstr, fuzzy)
+section = ID
+l = l[5:]
+msgid = msgstr = u''
+is_plural = False
+# This is a message with plural forms
+elif l.startswith('msgid_plural'):
+if section != ID:
+raise PoSyntaxError(
+'msgid_plural not preceeded by '
+'msgid on line %d of po file %s' %
+(lno, repr(self.name)))
+l = l[12:]
+msgid += u'\0'  # separator of singular and plural
+is_plural = True
+# Now we are in a msgstr section
+elif l.startswith('msgstr'):
+section = STR
+if l.startswith('msgstr['):
+if not is_plural:
+raise PoSyntaxError(
+'plural without msgid_plural '
+'on line %d of po file %s' %
+(lno, repr(self.name)))
+l = l.split(']', 1)[1]
+if msgstr:
+# Separator of the various plural forms
+msgstr += u'\0'
+else:
+if is_plural:
+raise PoSyntaxError(
+'indexed msgstr required for '
+'plural on line %d of po file %s' %
+(lno, repr(self.name)))
+l = l[6:]
+# Skip empty lines
+l = l.strip()
+if not l:
+continue
+# TODO: Does this always follow Python escape semantics?
+try:
+l = literal_eval(l)
+except Exception as msg:
+raise PoSyntaxError(
+'%s (line %d of po file %s): \n%s' %
+(msg, lno, repr(self.name), l))
+if isinstance(l, bytes):
+l = l.decode(self.encoding)
+if section == CTXT:
+msgctxt += l
+elif section == ID:
+msgid += l
+elif section == STR:
+msgstr += l
+else:
+raise PoSyntaxError(
+'error on line %d of po file %s' %
+(lno, repr(self.name)))
+# Add last entry
+if section == STR:
+self.add(msgctxt, msgid, msgstr, fuzzy)
+if self.openfile:
+self.po.close()
+def getAsFile(self):
+return BytesIO(self.get())

Mercurial > p > roundup > code

comparison roundup/msgfmt.py @ 5450:f2fade4552c5