changeset 5525:bb7865241f8a

Make CSV import/export compatible across Python versions (also RDBMS journals) (issue 2550976, issue 2550975). The roundup-admin export and import commands are used for migrating between different database backends. It is desirable that they should be usable also for migrations between Python 2 and Python 3, and in some cases (e.g. with the anydbm backend) this may be required. To be usable for such migrations, the format of the generated CSV files needs to be stable, meaning the same as currently used with Python 2. The export process uses repr() to produce the fields in the CSV files and eval() to convert them back to Python data structures. repr() of strings with non-ASCII characters produces different results for Python 2 and Python 3. This patch adds repr_export and eval_import functions to roundup/anypy/strings.py which provide the required operations that are just repr() and eval() in Python 2, but are more complicated in Python 3 to use data representations compatible with Python 2. These functions are then used in the required places for export and import. repr() and eval() are also used in storing the dict of changed values in the journal for the RDBMS backends. It is similarly desirable that the database be compatible between Python 2 and Python 3, so that export and import do not need to be used for a migration between Python versions for non-anydbm back ends. Thus, this patch changes rdbms_common.py in the places involved in storing journals in the database, not just in those involved in import/export. Given this patch, import/export with non-ASCII characters appear based on some limited testing to work across Python versions, and an instance using the sqlite backend appears to be compatible between Python versions without needing import/export, *if* the sessions/otks databases (which use anydbm) are deleted when changing Python version.
author Joseph Myers <jsm@polyomino.org.uk>
date Sun, 02 Sep 2018 23:48:04 +0000
parents 674ad58667b4
children df3f553fa414
files roundup/admin.py roundup/anypy/strings.py roundup/backends/back_anydbm.py roundup/backends/rdbms_common.py roundup/hyperdb.py
diffstat 5 files changed, 73 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/admin.py	Sun Sep 02 18:59:41 2018 +0000
+++ b/roundup/admin.py	Sun Sep 02 23:48:04 2018 +0000
@@ -32,6 +32,7 @@
 from roundup.i18n import _
 from roundup.exceptions import UsageError
 from roundup.anypy.my_input import my_input
+from roundup.anypy.strings import repr_export
 
 try:
     from UserDict import UserDict
@@ -1201,13 +1202,13 @@
                     sys.stdout.flush()
                 node = cl.getnode(nodeid)
                 exp = cl.export_list(propnames, nodeid)
-                lensum = sum ([len (repr(node[p])) for p in propnames])
+                lensum = sum ([len (repr_export(node[p])) for p in propnames])
                 # for a safe upper bound of field length we add
                 # difference between CSV len and sum of all field lengths
                 d = sum ([len(x) for x in exp]) - lensum
                 assert (d > 0)
                 for p in propnames:
-                    ll = len(repr(node[p])) + d
+                    ll = len(repr_export(node[p])) + d
                     if ll > max_len:
                         max_len = ll
                 writer.writerow(exp)
--- a/roundup/anypy/strings.py	Sun Sep 02 18:59:41 2018 +0000
+++ b/roundup/anypy/strings.py	Sun Sep 02 23:48:04 2018 +0000
@@ -96,3 +96,51 @@
         return chr(c)
     else:
         return unichr(c)
+
+# CSV files used for export and import represent strings in the style
+# used by repr in Python 2; this means that each byte of the UTF-8
+# representation is represented by a \x escape if not a printable
+# ASCII character.  When such a representation is interpreted by eval
+# in Python 3, the effect is that the Unicode characters in the
+# resulting string correspond to UTF-8 bytes, so encoding the string
+# as ISO-8859-1 produces the correct byte-string which must then be
+# decoded as UTF-8 to produce the correct Unicode string.  The same
+# representations are also used for journal storage in RDBMS
+# databases, so that the database can be compatible between Python 2
+# and Python 3.
+
+def repr_export(v):
+    """Return a Python-2-style representation of a value for export to CSV."""
+    if _py3:
+        if isinstance(v, str):
+            return repr(s2b(v))[1:]
+        elif isinstance(v, dict):
+            repr_vals = []
+            for key, value in sorted(v.items()):
+                repr_vals.append('%s: %s' % (repr_export(key),
+                                             repr_export(value)))
+            return '{%s}' % ', '.join(repr_vals)
+        else:
+            return repr(v)
+    else:
+        return repr(v)
+
+def eval_import(s):
+    """Evaluate a Python-2-style value imported from a CSV file."""
+    if _py3:
+        v = eval(s)
+        if isinstance(v, str):
+            return v.encode('iso-8859-1').decode('utf-8')
+        elif isinstance(v, dict):
+            v_mod = {}
+            for key, value in v.items():
+                if isinstance(key, str):
+                    key = key.encode('iso-8859-1').decode('utf-8')
+                if isinstance(value, str):
+                    value = value.encode('iso-8859-1').decode('utf-8')
+                v_mod[key] = value
+            return v_mod
+        else:
+            return v
+    else:
+        return eval(s)
--- a/roundup/backends/back_anydbm.py	Sun Sep 02 18:59:41 2018 +0000
+++ b/roundup/backends/back_anydbm.py	Sun Sep 02 23:48:04 2018 +0000
@@ -25,7 +25,7 @@
 import os, marshal, re, weakref, string, copy, time, shutil, logging
 
 from roundup.anypy.dbm_ import anydbm, whichdb
-from roundup.anypy.strings import b2s, bs2b
+from roundup.anypy.strings import b2s, bs2b, repr_export, eval_import
 
 from roundup import hyperdb, date, password, roundupdb, security, support
 from roundup.backends import locking
@@ -2031,10 +2031,10 @@
                 value = value.get_tuple()
             elif isinstance(proptype, hyperdb.Password):
                 value = str(value)
-            l.append(repr(value))
+            l.append(repr_export(value))
 
         # append retired flag
-        l.append(repr(self.is_retired(nodeid)))
+        l.append(repr_export(self.is_retired(nodeid)))
 
         return l
 
@@ -2057,8 +2057,9 @@
             # Figure the property for this column
             propname = propnames[i]
 
-            # Use eval to reverse the repr() used to output the CSV
-            value = eval(proplist[i])
+            # Use eval_import to reverse the repr_export() used to
+            # output the CSV
+            value = eval_import(proplist[i])
 
             # "unmarshal" where necessary
             if propname == 'id':
@@ -2127,8 +2128,9 @@
                             value = str(value)
                         export_data[propname] = value
                     params = export_data
-                r.append([repr(nodeid), repr(date), repr(user),
-                    repr(action), repr(params)])
+                r.append([repr_export(nodeid), repr_export(date),
+                          repr_export(user), repr_export(action),
+                          repr_export(params)])
         return r
 
 class FileClass(hyperdb.FileClass, Class):
--- a/roundup/backends/rdbms_common.py	Sun Sep 02 18:59:41 2018 +0000
+++ b/roundup/backends/rdbms_common.py	Sun Sep 02 23:48:04 2018 +0000
@@ -69,7 +69,7 @@
 from roundup.date import Range
 
 from roundup.backends.back_anydbm import compile_expression
-from roundup.anypy.strings import b2s, bs2b, us2s
+from roundup.anypy.strings import b2s, bs2b, us2s, repr_export, eval_import
 
 
 # dummy value meaning "argument not passed"
@@ -1302,7 +1302,7 @@
         if isinstance(params, type({})):
             self._journal_marshal(params, classname)
 
-        params = repr(params)
+        params = repr_export(params)
 
         dc = self.to_sql_value(hyperdb.Date)
         journaldate = dc(journaldate)
@@ -1328,7 +1328,7 @@
             # make the journalled data marshallable
             if isinstance(params, type({})):
                 self._journal_marshal(params, classname)
-            params = repr(params)
+            params = repr_export(params)
 
             self.save_journal(classname, cols, nodeid, dc(journaldate),
                 journaltag, action, params)
@@ -1366,7 +1366,7 @@
         res = []
         properties = self.getclass(classname).getprops()
         for nodeid, date_stamp, user, action, params in journal:
-            params = eval(params)
+            params = eval_import(params)
             if isinstance(params, type({})):
                 for param, value in params.items():
                     if not value:
@@ -2892,8 +2892,8 @@
                 value = value.get_tuple()
             elif isinstance(proptype, hyperdb.Password):
                 value = str(value)
-            l.append(repr(value))
-        l.append(repr(self.is_retired(nodeid)))
+            l.append(repr_export(value))
+        l.append(repr_export(self.is_retired(nodeid)))
         return l
 
     def import_list(self, propnames, proplist):
@@ -2914,10 +2914,11 @@
         if not "id" in propnames:
             newid = self.db.newid(self.classname)
         else:
-            newid = eval(proplist[propnames.index("id")])
+            newid = eval_import(proplist[propnames.index("id")])
         for i in range(len(propnames)):
-            # Use eval to reverse the repr() used to output the CSV
-            value = eval(proplist[i])
+            # Use eval_import to reverse the repr_export() used to
+            # output the CSV
+            value = eval_import(proplist[i])
 
             # Figure the property for this column
             propname = propnames[i]
@@ -3010,7 +3011,7 @@
                     # old tracker with data stored in the create!
                     params = {}
                 l = [nodeid, date, user, action, params]
-                r.append(list(map(repr, l)))
+                r.append(list(map(repr_export, l)))
         return r
 
 class FileClass(hyperdb.FileClass, Class):
--- a/roundup/hyperdb.py	Sun Sep 02 18:59:41 2018 +0000
+++ b/roundup/hyperdb.py	Sun Sep 02 23:48:04 2018 +0000
@@ -31,6 +31,7 @@
 from roundup.i18n import _
 from roundup.cgi.exceptions import DetectorError
 from roundup.anypy.cmp_ import NoneAndDictComparable
+from roundup.anypy.strings import eval_import
 
 logger = logging.getLogger('roundup.hyperdb')
 
@@ -1498,7 +1499,7 @@
         last = 0
         r = []
         for n, l in a:
-            nodeid, jdate, user, action, params = map(eval, l)
+            nodeid, jdate, user, action, params = map(eval_import, l)
             assert (str(n) == nodeid)
             if n != last:
                 if r:

Roundup Issue Tracker: http://roundup-tracker.org/