Mercurial > p > roundup > code
diff roundup/backends/indexer_common.py @ 3544:5cd1c83dea50
Features and fixes.
Feature:
- trackers may configure custom stop-words for the full-text indexer
Fixed:
- fixes in scripts/import_sf.py
- fix some unicode bugs in roundup-admin import
- Xapian indexer wasn't actually being used
- fix indexing of message content on roundup-admin import
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Mon, 06 Feb 2006 21:00:47 +0000 |
| parents | a8c2371f45b6 |
| children | 5f4db2650da3 |
line wrap: on
line diff
--- a/roundup/backends/indexer_common.py Mon Feb 06 02:35:47 2006 +0000 +++ b/roundup/backends/indexer_common.py Mon Feb 06 21:00:47 2006 +0000 @@ -1,26 +1,29 @@ -#$Id: indexer_common.py,v 1.4 2005-01-08 16:16:59 jlgijsbers Exp $ -import re +#$Id: indexer_common.py,v 1.5 2006-02-06 21:00:47 richard Exp $ +import re, sets from roundup import hyperdb -stopwords = [ -"A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY", -"FOR", "IF", "IN", "INTO", "IS", "IT", -"NO", "NOT", "OF", "ON", "OR", "SUCH", -"THAT", "THE", "THEIR", "THEN", "THERE", "THESE", -"THEY", "THIS", "TO", "WAS", "WILL", "WITH" +STOPWORDS = [ + "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY", + "FOR", "IF", "IN", "INTO", "IS", "IT", + "NO", "NOT", "OF", "ON", "OR", "SUCH", + "THAT", "THE", "THEIR", "THEN", "THERE", "THESE", + "THEY", "THIS", "TO", "WAS", "WILL", "WITH" ] -is_stopword = {} -for word in stopwords: - is_stopword[word] = None -is_stopword = is_stopword.has_key - def _isLink(propclass): return (isinstance(propclass, hyperdb.Link) or isinstance(propclass, hyperdb.Multilink)) class Indexer: + def __init__(self, db): + self.stopwords = sets.Set(STOPWORDS) + for word in db.config[('main', 'indexer_stopwords')]: + self.stopwords.add(word) + + def is_stopword(self, word): + return word in self.stopwords + def getHits(self, search_terms, klass): return self.find(search_terms)
