Mercurial > p > roundup > code
changeset 4282:8081d34fefa5
Fix Issue2550609.
| author | Stefan Seefeld <stefan@seefeld.name> |
|---|---|
| date | Tue, 24 Nov 2009 20:12:52 +0000 |
| parents | 864746c0cf8d |
| children | fd28b1f291dd |
| files | roundup/backends/indexer_rdbms.py |
| diffstat | 1 files changed, 8 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/roundup/backends/indexer_rdbms.py Wed Oct 21 09:57:36 2009 +0000 +++ b/roundup/backends/indexer_rdbms.py Tue Nov 24 20:12:52 2009 +0000 @@ -64,10 +64,14 @@ self.db.cursor.execute(sql, (id, )) # ok, find all the unique words in the text - text = unicode(text, "utf-8", "replace").upper() - wordlist = [w.encode("utf-8") - for w in re.findall(r'(?u)\b\w{%d,%d}\b' - % (self.minlength, self.maxlength), text)] + def tryencode(str): + if not isinstance(str, unicode): + str = str.encode("utf-8", "replace") + return str + text = tryencode(text).upper() + wordlist = [tryencode(w) + for w in re.findall(r'(?u)\b\w{%d,%d}\b' + % (self.minlength, self.maxlength), text)] words = set() for word in wordlist: if self.is_stopword(word): continue
