Mercurial > p > roundup > code
view roundup/msgfmt.py @ 5096:e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
Also adds new config.ini setting called indexer to select
indexer. See ``doc/upgrading.txt`` for details. Initial patch
done by David Wolever. Patch modified (see ticket or below for
changes), docs updated and committed.
I have an outstanding issue with test/test_indexer.py. I have to
comment out all imports and tests for indexers I don't have (i.e.
mysql, postgres) otherwise no tests run.
With that change made, dbm, sqlite (rdbms), xapian and whoosh indexes
are all passing the indexer tests.
Changes summary:
1) support native back ends dbm and rdbms. (original patch only fell
through to dbm)
2) Developed whoosh stopfilter to not index stopwords or words outside
the the maxlength and minlength limits defined in index_common.py.
Required to pass the extremewords test_indexer test. Also I
removed a call to .lower on the input text as the tokenizer I chose
automatically does the lowercase.
3) Added support for max/min length to find. This was needed to pass
extremewords test.
4) Added back a call to save_index in add_text. This allowed all but
two tests to pass.
5) Fixed a call to:
results = searcher.search(query.Term("identifier", identifier))
which had an extra parameter that is an error under current whoosh.
6) Set limit=None in search call for find() otherwise it only return
10 items. This allowed it to pass manyresults test
Also due to changes in the roundup code removed the call in
indexer_whoosh to
from roundup.anypy.sets_ import set
since we use the python builtin set.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sat, 25 Jun 2016 20:10:03 -0400 |
| parents | 7000070fe600 |
| children | 4498f5252f8b c643d693d91e |
line wrap: on
line source
#! /usr/bin/env python # -*- coding: iso-8859-1 -*- # Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> # Plural forms support added by alexander smishlajev <alex@tycobka.lv> """Generate binary message catalog from textual translation description. This program converts a textual Uniforum-style message catalog (.po file) into a binary GNU catalog (.mo file). This is essentially the same function as the GNU msgfmt program, however, it is a simpler implementation. Usage: msgfmt.py [OPTIONS] filename.po Options: -o file --output-file=file Specify the output file to write to. If omitted, output will go to a file named filename.mo (based off the input file name). -h --help Print this message and exit. -V --version Display version information and exit. """ import sys import os import getopt import struct import array __version__ = "1.1" MESSAGES = {} def usage(code, msg=''): print >> sys.stderr, __doc__ if msg: print >> sys.stderr, msg sys.exit(code) def add(id, str, fuzzy): "Add a non-fuzzy translation to the dictionary." global MESSAGES if not fuzzy and str and not str.startswith('\0'): MESSAGES[id] = str def generate(): "Return the generated output." global MESSAGES keys = MESSAGES.keys() # the keys are sorted in the .mo file keys.sort() offsets = [] ids = strs = '' for id in keys: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) ids += id + '\0' strs += MESSAGES[id] + '\0' output = '' # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. # translated string. keystart = 7*4+16*len(keys) # and the values start after the keys valuestart = keystart + len(ids) koffsets = [] voffsets = [] # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. for o1, l1, o2, l2 in offsets: koffsets += [l1, o1+keystart] voffsets += [l2, o2+valuestart] offsets = koffsets + voffsets output = struct.pack("Iiiiiii", 0x950412deL, # Magic 0, # Version len(keys), # # of entries 7*4, # start of key index 7*4+len(keys)*8, # start of value index 0, 0) # size and offset of hash table output += array.array("i", offsets).tostring() output += ids output += strs return output def make(filename, outfile): ID = 1 STR = 2 global MESSAGES MESSAGES = {} # Compute .mo name from .po name and arguments if filename.endswith('.po'): infile = filename else: infile = filename + '.po' if outfile is None: outfile = os.path.splitext(infile)[0] + '.mo' try: lines = open(infile).readlines() except IOError, msg: print >> sys.stderr, msg sys.exit(1) # remove UTF-8 Byte Order Mark, if any. # (UCS2 BOMs are not handled because messages in UCS2 cannot be handled) if lines[0].startswith('\xEF\xBB\xBF'): lines[0] = lines[0][3:] section = None fuzzy = 0 # Parse the catalog lno = 0 for l in lines: lno += 1 # If we get a comment line after a msgstr, this is a new entry if l[0] == '#' and section == STR: add(msgid, msgstr, fuzzy) section = None fuzzy = 0 # Record a fuzzy mark if l[:2] == '#,' and (l.find('fuzzy') >= 0): fuzzy = 1 # Skip comments if l[0] == '#': continue # Start of msgid_plural section, separate from singular form with \0 if l.startswith('msgid_plural'): msgid += '\0' l = l[12:] # Now we are in a msgid section, output previous section elif l.startswith('msgid'): if section == STR: add(msgid, msgstr, fuzzy) section = ID l = l[5:] msgid = msgstr = '' # Now we are in a msgstr section elif l.startswith('msgstr'): section = STR l = l[6:] # Check for plural forms if l.startswith('['): # Separate plural forms with \0 if not l.startswith('[0]'): msgstr += '\0' # Ignore the index - must come in sequence l = l[l.index(']') + 1:] # Skip empty lines l = l.strip() if not l: continue # XXX: Does this always follow Python escape semantics? l = eval(l) if section == ID: msgid += l elif section == STR: msgstr += l else: print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ 'before:' print >> sys.stderr, l sys.exit(1) # Add last entry if section == STR: add(msgid, msgstr, fuzzy) # Compute output output = generate() try: open(outfile,"wb").write(output) except IOError,msg: print >> sys.stderr, msg def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hVo:', ['help', 'version', 'output-file=']) except getopt.error, msg: usage(1, msg) outfile = None # parse options for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-V', '--version'): print >> sys.stderr, "msgfmt.py", __version__ sys.exit(0) elif opt in ('-o', '--output-file'): outfile = arg # do it if not args: print >> sys.stderr, 'No input file given' print >> sys.stderr, "Try `msgfmt --help' for more information." return for filename in args: make(filename, outfile) if __name__ == '__main__': main() # vim: set et sts=4 sw=4 :
