changeset 4282:8081d34fefa5

Fix Issue2550609.
author Stefan Seefeld <stefan@seefeld.name>
date Tue, 24 Nov 2009 20:12:52 +0000
parents 864746c0cf8d
children fd28b1f291dd
files roundup/backends/indexer_rdbms.py
diffstat 1 files changed, 8 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/backends/indexer_rdbms.py	Wed Oct 21 09:57:36 2009 +0000
+++ b/roundup/backends/indexer_rdbms.py	Tue Nov 24 20:12:52 2009 +0000
@@ -64,10 +64,14 @@
             self.db.cursor.execute(sql, (id, ))
 
         # ok, find all the unique words in the text
-        text = unicode(text, "utf-8", "replace").upper()
-        wordlist = [w.encode("utf-8")
-            for w in re.findall(r'(?u)\b\w{%d,%d}\b'
-                                % (self.minlength, self.maxlength), text)]
+        def tryencode(str):
+            if not isinstance(str, unicode):
+                str = str.encode("utf-8", "replace")
+            return str
+        text = tryencode(text).upper()
+        wordlist = [tryencode(w)
+                    for w in re.findall(r'(?u)\b\w{%d,%d}\b'
+                                        % (self.minlength, self.maxlength), text)]
         words = set()
         for word in wordlist:
             if self.is_stopword(word): continue

Roundup Issue Tracker: http://roundup-tracker.org/