Mercurial > p > roundup > code

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/roundup/anypy/strings.py	Wed Jul 25 09:05:58 2018 +0000
@@ -0,0 +1,75 @@
+# Roundup represents text internally using the native Python str type.
+# In Python 3, these are Unicode strings.  In Python 2, these are
+# encoded using UTF-8, and the Python 2 unicode type is only used in a
+# few places, generally for interacting with external modules
+# requiring that type to be used.
+
+import sys
+_py3 = sys.version_info[0] > 2
+
+def b2s(b):
+    """Convert a UTF-8 encoded bytes object to the internal string format."""
+    if _py3:
+        return b.decode('utf-8')
+    else:
+        return b
+
+def s2b(s):
+    """Convert a string object to UTF-8 encoded bytes."""
+    if _py3:
+        return s.encode('utf-8')
+    else:
+        return s
+
+def s2u(s, errors='strict'):
+    """Convert a string object to a Unicode string."""
+    if _py3:
+        return s
+    else:
+        return unicode(s, 'utf-8', errors)
+
+def u2s(u):
+    """Convert a Unicode string to the internal string format."""
+    if _py3:
+        return u
+    else:
+        return u.encode('utf-8')
+
+def us2u(s, errors='strict'):
+    """Convert a string or Unicode string to a Unicode string."""
+    if _py3:
+        return s
+    else:
+        if isinstance(s, unicode):
+            return s
+        else:
+            return unicode(s, 'utf-8', errors)
+
+def us2s(u):
+    """Convert a string or Unicode string to the internal string format."""
+    if _py3:
+        return u
+    else:
+        if isinstance(u, unicode):
+            return u.encode('utf-8')
+        else:
+            return u
+
+def uany2s(u):
+    """Convert a Unicode string or other object to the internal string format.
+
+    Objects that are not Unicode strings are passed to str()."""
+    if _py3:
+        return str(u)
+    else:
+        if isinstance(u, unicode):
+            return u.encode('utf-8')
+        else:
+            return str(u)
+
+def is_us(s):
+    """Return whether an object is a string or Unicode string."""
+    if _py3:
+        return isinstance(s, str)
+    else:
+        return isinstance(s, str) or isinstance(s, unicode)
--- a/roundup/backends/back_sqlite.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/backends/back_sqlite.py	Wed Jul 25 09:05:58 2018 +0000
@@ -13,6 +13,7 @@
 from roundup import hyperdb, date, password
 from roundup.backends import rdbms_common
 from roundup.backends.sessions_dbm import Sessions, OneTimeKeys
+from roundup.anypy.strings import uany2s

 sqlite_version = None
 try:
@@ -85,7 +86,7 @@
         hyperdb.Multilink : lambda x: x,    # used in journal marshalling
     }
     sql_to_hyperdb_value = {
-        hyperdb.String : lambda x: isinstance(x, unicode) and x.encode('utf8') or str(x),
+        hyperdb.String : uany2s,
         hyperdb.Date   : lambda x: date.Date(str(x)),
         hyperdb.Link   : str, # XXX numeric ids
         hyperdb.Interval  : date.Interval,
--- a/roundup/backends/indexer_rdbms.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/backends/indexer_rdbms.py	Wed Jul 25 09:05:58 2018 +0000
@@ -5,6 +5,7 @@
 import re

 from roundup.backends.indexer_common import Indexer as IndexerBase
+from roundup.anypy.strings import us2u, u2s

 class Indexer(IndexerBase):
     def __init__(self, db):
@@ -61,10 +62,9 @@
             self.db.cursor.execute(sql, (id, ))

         # ok, find all the unique words in the text
-        if not isinstance(text, unicode):
-            text = unicode(text, "utf-8", "replace")
+        text = us2u(text, "replace")
         text = text.upper()
-        wordlist = [w.encode("utf-8")
+        wordlist = [u2s(w)
                     for w in re.findall(r'(?u)\b\w{%d,%d}\b'
                                         % (self.minlength, self.maxlength), text)]
         words = set()
--- a/roundup/backends/indexer_whoosh.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/backends/indexer_whoosh.py	Wed Jul 25 09:05:58 2018 +0000
@@ -5,6 +5,7 @@
 from whoosh import fields, qparser, index, query, analysis

 from roundup.backends.indexer_common import Indexer as IndexerBase
+from roundup.anypy.strings import us2u

 class Indexer(IndexerBase):
     def __init__(self, db):
@@ -78,8 +79,7 @@
         if not text:
             text = u''

-        if not isinstance(text, unicode):
-            text = unicode(text, "utf-8", "replace")
+        text = us2u(text, "replace")

         # We use the identifier twice: once in the actual "text" being
         # indexed so we can search on it, and again as the "data" being
--- a/roundup/backends/rdbms_common.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/backends/rdbms_common.py	Wed Jul 25 09:05:58 2018 +0000
@@ -69,6 +69,7 @@
 from roundup.date import Range

 from roundup.backends.back_anydbm import compile_expression
+from roundup.anypy.strings import us2s


 # dummy value meaning "argument not passed"
@@ -2944,8 +2945,7 @@
             elif isinstance(prop, hyperdb.Password):
                 value = password.Password(encrypted=value)
             elif isinstance(prop, String):
-                if isinstance(value, unicode):
-                    value = value.encode('utf8')
+                value = us2s(value)
                 if not isinstance(value, str):
                     raise TypeError('new property "%(propname)s" not a '
                         'string: %(value)r'%locals())
--- a/roundup/cgi/PageTemplates/TALES.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/cgi/PageTemplates/TALES.py	Wed Jul 25 09:05:58 2018 +0000
@@ -231,7 +231,7 @@
         text = self.evaluate(expr)
         if text is Default or text is None:
             return text
-        if isinstance(text, unicode):
+        if isinstance(text, type(u'')):
             return text
         else:
             return ustr(text)
--- a/roundup/cgi/TranslationService.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/cgi/TranslationService.py	Wed Jul 25 09:05:58 2018 +0000
@@ -16,13 +16,12 @@
 from roundup import i18n
 from roundup.cgi.PageTemplates import Expressions, PathIterator, TALES
 from roundup.cgi.TAL import TALInterpreter
+from roundup.anypy.strings import us2u, u2s

 ### Translation classes

 class TranslationServiceMixin:

-    OUTPUT_ENCODING = "utf-8"
-
     def translate(self, domain, msgid, mapping=None,
         context=None, target_language=None, default=None
     ):
@@ -32,18 +31,15 @@
         return _msg

     def gettext(self, msgid):
-        if not isinstance(msgid, unicode):
-            msgid = unicode(msgid, 'utf8')
+        msgid = us2u(msgid)
         msgtrans=self.ugettext(msgid)
-        return msgtrans.encode(self.OUTPUT_ENCODING)
+        return u2s(msgtrans)

     def ngettext(self, singular, plural, number):
-        if not isinstance(singular, unicode):
-            singular = unicode(singular, 'utf8')
-        if not isinstance(plural, unicode):
-            plural = unicode(plural, 'utf8')
+        singular = us2u(singular)
+        plural = us2u(plural)
         msgtrans=self.ungettext(singular, plural, number)
-        return msgtrans.encode(self.OUTPUT_ENCODING)
+        return u2s(msgtrans)

 class TranslationService(TranslationServiceMixin, i18n.RoundupTranslations):
     pass
@@ -55,8 +51,7 @@
             return self._fallback.ugettext(message)
         # Sometimes the untranslatable message is a UTF-8 encoded string
         # (thanks to PageTemplate's internals).
-        if not isinstance(message, unicode):
-            return unicode(message, 'utf8')
+        message = us2u(message)
         return message

 ### TAL patching
--- a/roundup/cgi/engine_chameleon.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/cgi/engine_chameleon.py	Wed Jul 25 09:05:58 2018 +0000
@@ -6,6 +6,7 @@
 import chameleon

 from roundup.cgi.templating import StringIO, context, TALLoaderBase
+from roundup.anypy.strings import s2u

 class Loader(TALLoaderBase):
     def __init__(self, dir):
@@ -27,7 +28,7 @@
         def translate(msgid, domain=None, mapping=None, default=None):
             result = client.translator.translate(domain, msgid,
                          mapping=mapping, default=default)
-            return unicode(result, client.translator.OUTPUT_ENCODING)
+            return s2u(result)

         output = self._pt.render(None, translate, **c)
         return output.encode(client.charset)
--- a/roundup/cgi/engine_jinja2.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/cgi/engine_jinja2.py	Wed Jul 25 09:05:58 2018 +0000
@@ -40,6 +40,7 @@
 # http://jinja.pocoo.org/docs/api/#loaders

 from roundup.cgi.templating import context, LoaderBase, TemplateBase
+from roundup.anypy.strings import s2u

 class Jinja2Loader(LoaderBase):
     def __init__(self, dir):
@@ -59,8 +60,7 @@
         # The automatic conversion will assume 'ascii' and fail sometime.
         # Analysed with roundup 1.5.0 and jinja 2.7.1. See issue2550811.
         self._env.filters["u"] = lambda s: \
-            unicode(s(), "utf-8") if type(s) == MethodType \
-                                  else unicode(s, "utf-8")
+            s2u(s()) if type(s) == MethodType else s2u(s)

     def check(self, tplname):
         #print tplname
--- a/roundup/cgi/templating.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/cgi/templating.py	Wed Jul 25 09:05:58 2018 +0000
@@ -29,6 +29,7 @@
 from roundup import hyperdb, date, support
 from roundup import i18n
 from roundup.i18n import _
+from roundup.anypy.strings import is_us, us2s, s2u, u2s

 from .KeywordsExpr import render_keywords_expression_editor

@@ -1774,7 +1775,7 @@
             return self.plain(escape=1)

         value = self._value
-        if isinstance(value, str) or isinstance(value, unicode):
+        if is_us(value):
             value = value.strip().lower() in ('checked', 'yes', 'true',
                 'on', '1')

@@ -1827,8 +1828,7 @@
             anonymous=0, offset=None):
         HTMLProperty.__init__(self, client, classname, nodeid, prop, name,
                 value, anonymous=anonymous)
-        if self._value and not (isinstance(self._value, str) or
-                isinstance(self._value, unicode)):
+        if self._value and not is_us(self._value):
             self._value.setTranslator(self._client.translator)
         self._offset = offset
         if self._offset is None :
@@ -1910,9 +1910,9 @@
                     raise ValueError(self._('default value for '
                         'DateHTMLProperty must be either DateHTMLProperty '
                         'or string date representation.'))
-        elif isinstance(value, str) or isinstance(value, unicode):
+        elif is_us(value):
             # most likely erroneous input to be passed back to user
-            if isinstance(value, unicode): value = value.encode('utf8')
+            value = us2s(value)
             s = self.input(name=self._formname, value=value, size=size,
                               **kwargs)
             if popcal:
@@ -1923,7 +1923,7 @@

         if raw_value is None:
             value = ''
-        elif isinstance(raw_value, str) or isinstance(raw_value, unicode):
+        elif is_us(raw_value):
             if format is self._marker:
                 value = raw_value
             else:
@@ -2012,7 +2012,7 @@
             anonymous=0):
         HTMLProperty.__init__(self, client, classname, nodeid, prop,
             name, value, anonymous)
-        if self._value and not isinstance(self._value, (str, unicode)):
+        if self._value and not is_us(self._value):
             self._value.setTranslator(self._client.translator)

     def plain(self, escape=0):
@@ -2967,9 +2967,9 @@
         klass = self.client.db.getclass(self.classname)
         if self.search_text:
             matches = self.client.db.indexer.search(
-                [w.upper().encode("utf-8", "replace") for w in re.findall(
+                [u2s(w.upper()) for w in re.findall(
                     r'(?u)\b\w{2,25}\b',
-                    unicode(self.search_text, "utf-8", "replace")
+                    s2u(self.search_text, "replace")
                 )], klass)
         else:
             matches = None
--- a/roundup/configuration.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/configuration.py	Wed Jul 25 09:05:58 2018 +0000
@@ -540,8 +540,9 @@
         return value.pattern

     def str2value(self, value):
-        if not isinstance(value, unicode):
+        if not isinstance(value, type(u'')):
             value = str(value)
+        if not isinstance(value, type(u'')):
             # if it is 7-bit ascii, use it as string,
             # otherwise convert to unicode.
             try:
--- a/roundup/dehtml.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/dehtml.py	Wed Jul 25 09:05:58 2018 +0000
@@ -1,5 +1,6 @@

 from __future__ import print_function
+from roundup.anypy.strings import u2s
 class dehtml:
     def __init__(self, converter):
         if converter == "none":
@@ -17,7 +18,7 @@
                     for script in soup(["script", "style"]):
                         script.extract()

-                    return soup.get_text('\n', strip=True).encode('utf-8')
+                    return u2s(soup.get_text('\n', strip=True))

                 self.html2text = html2text
             else:
--- a/roundup/i18n.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/i18n.py	Wed Jul 25 09:05:58 2018 +0000
@@ -40,6 +40,7 @@
 import os

 from roundup import msgfmt
+from roundup.anypy.strings import is_us

 # List of directories for mo file search (see SF bug 1219689)
 LOCALE_DIRS = [
@@ -79,7 +80,7 @@
             if val:
                 languages = val.split(':')
                 break
-    elif isinstance(language, str) or  isinstance(language, unicode):
+    elif is_us(language):
         languages = [language]
     else:
         # 'language' must be iterable
--- a/roundup/mailer.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/mailer.py	Wed Jul 25 09:05:58 2018 +0000
@@ -17,6 +17,7 @@
 from email.mime.multipart import MIMEMultipart

 from roundup.anypy import email_
+from roundup.anypy.strings import s2u

 try:
     import pyme, pyme.core
@@ -85,12 +86,12 @@
         '''
         # encode header values if they need to be
         charset = getattr(self.config, 'EMAIL_CHARSET', 'utf-8')
-        tracker_name = unicode(self.config.TRACKER_NAME, 'utf-8')
+        tracker_name = s2u(self.config.TRACKER_NAME)
         if not author:
             author = (tracker_name, self.config.ADMIN_EMAIL)
             name = author[0]
         else:
-            name = unicode(author[0], 'utf-8')
+            name = s2u(author[0])
         author = nice_sender_header(name, author[1], charset)
         try:
             message['Subject'] = subject.encode('ascii')
--- a/roundup/mailgw.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/mailgw.py	Wed Jul 25 09:05:58 2018 +0000
@@ -97,6 +97,7 @@

 import string, re, os, mimetools, cStringIO, smtplib, socket, binascii, quopri
 import time, random, sys, logging
+import codecs
 import traceback
 import email.utils

@@ -343,7 +344,7 @@
             charset = charset.lower().replace("windows-", 'cp')
             # Do conversion only if charset specified - handle
             # badly-specified charsets
-            edata = unicode(data, charset, 'replace').encode('utf-8')
+            edata = codecs.decode(data, charset, 'replace').encode('utf-8')
             # Convert from dos eol to unix
             edata = edata.replace('\r\n', '\n')
         else:
--- a/roundup/password.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/password.py	Wed Jul 25 09:05:58 2018 +0000
@@ -24,6 +24,8 @@
 from base64 import b64encode, b64decode
 from hashlib import md5, sha1

+from roundup.anypy.strings import us2s, s2b
+
 try:
     import crypt
 except ImportError:
@@ -105,10 +107,8 @@
     :returns:
         raw bytes of generated key
     """
-    if isinstance(password, unicode):
-        password = password.encode("utf-8")
-    if isinstance(salt, unicode):
-        salt = salt.encode("utf-8")
+    password = s2b(us2s(password))
+    salt = s2b(us2s(salt))
     if keylen > 40:
         #NOTE: pbkdf2 allows up to (2**31-1)*20 bytes,
         # but m2crypto has issues on some platforms above 40,
@@ -126,8 +126,7 @@
     """ unpack pbkdf2 encrypted password into parts,
         assume it has format "{rounds}${salt}${digest}
     """
-    if isinstance(pbkdf2, unicode):
-        pbkdf2 = pbkdf2.encode("ascii")
+    pbkdf2 = us2s(pbkdf2)
     try:
         rounds, salt, digest = pbkdf2.split("$")
     except ValueError:
--- a/roundup/roundupdb.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/roundupdb.py	Wed Jul 25 09:05:58 2018 +0000
@@ -39,6 +39,8 @@
 from roundup.mailer import Mailer, MessageSendError, encode_quopri, \
     nice_sender_header

+from roundup.anypy.strings import s2u
+
 try:
     import pyme, pyme.core
     # gpgme_check_version() must have been called once in a programm
@@ -494,7 +496,7 @@
         charset = getattr(self.db.config, 'EMAIL_CHARSET', 'utf-8')

         # construct the content and convert to unicode object
-        body = unicode('\n'.join(m), 'utf-8').encode(charset)
+        body = s2u('\n'.join(m)).encode(charset)

         # make sure the To line is always the same (for testing mostly)
         sendto.sort()
@@ -520,7 +522,7 @@
             sendto = [sendto]

         # tracker sender info
-        tracker_name = unicode(self.db.config.TRACKER_NAME, 'utf-8')
+        tracker_name = s2u(self.db.config.TRACKER_NAME)
         tracker_name = nice_sender_header(tracker_name, from_address,
             charset)
--- a/roundup/xmlrpc.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/roundup/xmlrpc.py	Wed Jul 25 09:05:58 2018 +0000
@@ -12,6 +12,7 @@
 from roundup.anypy import xmlrpc_
 SimpleXMLRPCDispatcher = xmlrpc_.server.SimpleXMLRPCDispatcher
 Binary = xmlrpc_.client.Binary
+from roundup.anypy.strings import us2s
 from traceback import format_exc

 def translate(value):
@@ -41,13 +42,8 @@
             key, value = arg.split('=', 1)
         except ValueError :
             raise UsageError('argument "%s" not propname=value'%arg)
-        if isinstance(key, unicode):
-            try:
-                key = key.encode ('ascii')
-            except UnicodeEncodeError:
-                raise UsageError('argument %r is no valid ascii keyword'%key)
-        if isinstance(value, unicode):
-            value = value.encode('utf-8')
+        key = us2s(key)
+        value = us2s(value)
         if value:
             try:
                 props[key] = hyperdb.rawToHyperdb(db, cl, itemid,
--- a/scripts/import_sf.py	Wed Jul 25 00:40:26 2018 +0000
+++ b/scripts/import_sf.py	Wed Jul 25 09:05:58 2018 +0000
@@ -30,6 +30,7 @@

 from roundup import instance, hyperdb, date, support, password
 from roundup.anypy import http_, urllib_
+from roundup.anypy.strings import s2b, us2s

 today = date.Date('.')

@@ -295,7 +296,7 @@
                     files.append(fid)
                     name = name.strip()
                     try:
-                        f = open(os.path.join(file_dir, fid))
+                        f = open(os.path.join(file_dir, fid), 'rb')
                         content = f.read()
                         f.close()
                     except:
@@ -384,11 +385,11 @@
         if isinstance(klass, hyperdb.FileClass) and entry.get('content'):
             fname = klass.exportFilename('/tmp/imported/', entry['id'])
             support.ensureParentsExist(fname)
-            c = open(fname, 'w')
-            if isinstance(entry['content'], unicode):
-                c.write(entry['content'].encode('utf8'))
+            c = open(fname, 'wb')
+            if isinstance(entry['content'], bytes):
+                c.write(entry['content'])
             else:
-                c.write(entry['content'])
+                c.write(s2b(us2s(entry['content'])))
             c.close()

     f.close()