Mercurial > p > roundup > code
view roundup/msgfmt.py @ 6433:c1d3fbcdbfbd
issue2551142 - Import of retired node ... unique constraint failure.
Title: Import of retired node with username after active node fails
with unique constraint failure.
More fixes needed for mysql and postgresql.
mysql: add unique constraint for (keyvalue, __retired__) when
creating class in the database.
On schema change if class is changed, remove the unique
constraint too.
upgrade version of rdbms database from 5 to 6 to add constraint
to all version 5 databases that were created as version 5
and didn't get the unique constraint. Make no changes
on version 5 databases upgraded from version 4, the upgrade
process to 5 added the constraint. Make no changes
to other databases (sqlite, postgres) during upgrade from
version 5 to 6.
postgres: Handle the exception raised on unique constraint violation.
The exception invalidates the database connection so it
can't be used to recover from the exception.
Added two new database methods:
checkpoint_data - performs a db.commit under postgres
does nothing on other backends
restore_connection_on_error - does a db.rollback on
postgres, does nothing on other
backends
with the rollback() done on the connection I can use the
database connection to fixup the import that failed on the
unique constraint. This makes postgres slower but without the
commit after every imported object, the rollback will delete
all the entries done up to this point.
Trying to figure out how to make the caller do_import batch
and recover from this failure is beyond me.
Also dismissed having to process the export csv file before
importing. Pushing that onto a user just seems wrong. Also
since import/export isn't frequently done the lack of
surprise on having a failing import and reduced
load/frustration for the user seems worth it. Also the import
can be run in verbose mode where it prints out a row as it is
processed, so it may take a while, ut the user can get
feedback.
db_test-base.py: add test for upgrade from 5 to 6.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Thu, 10 Jun 2021 12:52:05 -0400 |
| parents | 4d2e1fa03f0f |
| children | 6a1c1cd69582 |
line wrap: on
line source
#! /usr/bin/env python # -*- coding: iso-8859-1 -*- # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de> # # Changed by Christian 'Tiran' Heimes <tiran@cheimes.de> for the placeless # translation service (PTS) of Zope # # Fixed some bugs and updated to support msgctxt # by Hanno Schlichting <hanno@hannosch.eu> """Generate binary message catalog from textual translation description. This program converts a textual Uniforum-style message catalog (.po file) into a binary GNU catalog (.mo file). This is essentially the same function as the GNU msgfmt program, however, it is a simpler implementation. This file was taken from Python-2.3.2/Tools/i18n and altered in several ways. Now you can simply use it from another python module: from msgfmt import Msgfmt mo = Msgfmt(po).get() where po is path to a po file as string, an opened po file ready for reading or a list of strings (readlines of a po file) and mo is the compiled mo file as binary string. Exceptions: * IOError if the file couldn't be read * msgfmt.PoSyntaxError if the po file has syntax errors """ import array from ast import literal_eval import codecs from email.parser import HeaderParser import struct import sys PY3 = sys.version_info[0] == 3 if PY3: def header_charset(s): p = HeaderParser() return p.parsestr(s).get_content_charset() import io BytesIO = io.BytesIO FILE_TYPE = io.IOBase else: def header_charset(s): p = HeaderParser() return p.parsestr(s.encode('utf-8', 'ignore')).get_content_charset() from cStringIO import StringIO as BytesIO # file is a type defined only under python 2. # Flake8 when run in py3 flags this. FILE_TYPE = file # noqa: 821 class PoSyntaxError(Exception): """ Syntax error in a po file """ def __init__(self, msg): self.msg = msg def __str__(self): return 'Po file syntax error: %s' % self.msg class Msgfmt: def __init__(self, po, name='unknown'): self.po = po self.name = name self.messages = {} self.openfile = False # Start off assuming latin-1, so everything decodes without failure, # until we know the exact encoding self.encoding = 'latin-1' def readPoData(self): """ read po data from self.po and return an iterator """ output = [] if isinstance(self.po, str): output = open(self.po, 'rb') elif isinstance(self.po, FILE_TYPE): self.po.seek(0) self.openfile = True output = self.po elif isinstance(self.po, list): output = self.po if not output: raise ValueError("self.po is invalid! %s" % type(self.po)) if isinstance(output, FILE_TYPE): # remove BOM from the start of the parsed input first = output.readline() if len(first) == 0: return output.readlines() if first.startswith(codecs.BOM_UTF8): first = first.lstrip(codecs.BOM_UTF8) return [first] + output.readlines() return output def add(self, context, id, string, fuzzy): "Add a non-empty and non-fuzzy translation to the dictionary." if string and not fuzzy: # The context is put before the id and separated by a EOT char. if context: id = context + u'\x04' + id if not id: # See whether there is an encoding declaration charset = header_charset(string) if charset: # decode header in proper encoding string = string.encode(self.encoding).decode(charset) if not PY3: # undo damage done by literal_eval in Python 2.x string = string.encode(self.encoding).decode(charset) self.encoding = charset self.messages[id] = string def generate(self): "Return the generated output." # the keys are sorted in the .mo file keys = sorted(self.messages.keys()) offsets = [] ids = strs = b'' for id in keys: msg = self.messages[id].encode(self.encoding) id = id.encode(self.encoding) # For each string, we need size and file offset. Each string is # NUL terminated; the NUL does not count into the size. offsets.append((len(ids), len(id), len(strs), len(msg))) ids += id + b'\0' strs += msg + b'\0' output = b'' # The header is 7 32-bit unsigned integers. We don't use hash tables, # so the keys start right after the index tables. keystart = 7 * 4 + 16 * len(keys) # and the values start after the keys valuestart = keystart + len(ids) koffsets = [] voffsets = [] # The string table first has the list of keys, then the list of values. # Each entry has first the size of the string, then the file offset. for o1, l1, o2, l2 in offsets: koffsets += [l1, o1 + keystart] voffsets += [l2, o2 + valuestart] offsets = koffsets + voffsets # Even though we don't use a hashtable, we still set its offset to be # binary compatible with the gnu gettext format produced by: # msgfmt file.po --no-hash output = struct.pack("Iiiiiii", 0x950412de, # Magic 0, # Version len(keys), # # of entries 7 * 4, # start of key index 7 * 4 + len(keys) * 8, # start of value index 0, keystart) # size and offset of hash table if PY3: output += array.array("i", offsets).tobytes() else: output += array.array("i", offsets).tostring() output += ids output += strs return output def get(self): """ """ self.read() # Compute output return self.generate() def read(self, header_only=False): """ """ ID = 1 STR = 2 CTXT = 3 section = None fuzzy = 0 msgid = msgstr = msgctxt = u'' # Parse the catalog lno = 0 for l in self.readPoData(): l = l.decode(self.encoding) lno += 1 # If we get a comment line after a msgstr or a line starting with # msgid or msgctxt, this is a new entry if section == STR and (l[0] == '#' or (l[0] == 'm' and (l.startswith('msgctxt') or l.startswith('msgid')))): self.add(msgctxt, msgid, msgstr, fuzzy) section = None fuzzy = 0 # If we only want the header we stop after the first message if header_only: break # Record a fuzzy mark if l[:2] == '#,' and 'fuzzy' in l: fuzzy = 1 # Skip comments if l[0] == '#': continue # Now we are in a msgctxt section if l.startswith('msgctxt'): section = CTXT l = l[7:] msgctxt = u'' # Now we are in a msgid section, output previous section elif (l.startswith('msgid') and not l.startswith('msgid_plural')): if section == STR: self.add(msgid, msgstr, fuzzy) section = ID l = l[5:] msgid = msgstr = u'' is_plural = False # This is a message with plural forms elif l.startswith('msgid_plural'): if section != ID: raise PoSyntaxError( 'msgid_plural not preceeded by ' 'msgid on line %d of po file %s' % (lno, repr(self.name))) l = l[12:] msgid += u'\0' # separator of singular and plural is_plural = True # Now we are in a msgstr section elif l.startswith('msgstr'): section = STR if l.startswith('msgstr['): if not is_plural: raise PoSyntaxError( 'plural without msgid_plural ' 'on line %d of po file %s' % (lno, repr(self.name))) l = l.split(']', 1)[1] if msgstr: # Separator of the various plural forms msgstr += u'\0' else: if is_plural: raise PoSyntaxError( 'indexed msgstr required for ' 'plural on line %d of po file %s' % (lno, repr(self.name))) l = l[6:] # Skip empty lines l = l.strip() if not l: continue # TODO: Does this always follow Python escape semantics? try: l = literal_eval(l) except Exception as msg: raise PoSyntaxError( '%s (line %d of po file %s): \n%s' % (msg, lno, repr(self.name), l)) if isinstance(l, bytes): l = l.decode(self.encoding) if section == CTXT: msgctxt += l elif section == ID: msgid += l elif section == STR: msgstr += l else: raise PoSyntaxError( 'error on line %d of po file %s' % (lno, repr(self.name))) # Add last entry if section == STR: self.add(msgctxt, msgid, msgstr, fuzzy) if self.openfile: self.po.close() def getAsFile(self): return BytesIO(self.get())
