Mercurial > p > roundup > code
diff roundup/backends/indexer_dbm.py @ 2872:d530b68e4b42
don't index common words [SF#1046612]
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Fri, 05 Nov 2004 05:10:07 +0000 |
| parents | 93f03c6714d8 |
| children | 1c063814d567 |
line wrap: on
line diff
--- a/roundup/backends/indexer_dbm.py Fri Nov 05 04:55:52 2004 +0000 +++ b/roundup/backends/indexer_dbm.py Fri Nov 05 05:10:07 2004 +0000 @@ -14,7 +14,7 @@ # that promote freedom, but obviously am giving up any rights # to compel such. # -#$Id: indexer_dbm.py,v 1.1 2004-03-19 04:47:59 richard Exp $ +#$Id: indexer_dbm.py,v 1.2 2004-11-05 05:10:07 richard Exp $ '''This module provides an indexer class, RoundupIndexer, that stores text indices in a roundup instance. This class makes searching the content of messages, string properties and text files possible. @@ -24,6 +24,18 @@ import os, shutil, re, mimetypes, marshal, zlib, errno from roundup.hyperdb import Link, Multilink +stopwords = [ +"A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY", +"FOR", "IF", "IN", "INTO", "IS", "IT", +"NO", "NOT", "OF", "ON", "OR", "SUCH", +"THAT", "THE", "THEIR", "THEN", "THERE", "THESE", +"THEY", "THIS", "TO", "WAS", "WILL", "WITH" +] +is_stopword = {} +for word in stopwords: + is_stopword[word] = None +is_stopword = is_stopword.has_key + class Indexer: '''Indexes information from roundup's hyperdb to allow efficient searching. @@ -95,6 +107,8 @@ # find the unique words filedict = {} for word in words: + if is_stopword(word): + continue if filedict.has_key(word): filedict[word] = filedict[word]+1 else:
