Mercurial > p > roundup > code
diff roundup/backends/indexer_rdbms.py @ 2872:d530b68e4b42
don't index common words [SF#1046612]
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Fri, 05 Nov 2004 05:10:07 +0000 |
| parents | 18addf2a8596 |
| children | f8d0fd056ac0 |
line wrap: on
line diff
--- a/roundup/backends/indexer_rdbms.py Fri Nov 05 04:55:52 2004 +0000 +++ b/roundup/backends/indexer_rdbms.py Fri Nov 05 05:10:07 2004 +0000 @@ -4,10 +4,9 @@ ''' import re -from indexer_dbm import Indexer +from indexer_dbm import Indexer, is_stopword class Indexer(Indexer): - disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1} def __init__(self, db): self.db = db self.reindex = 0 @@ -55,8 +54,9 @@ wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper()) words = {} for word in wordlist: - if not self.disallows.has_key(word): - words[word] = 1 + if is_stopword(word): + continue + words[word] = 1 words = words.keys() # for each word, add an entry in the db
