diff roundup/backends/indexer_common.py @ 3544:5cd1c83dea50

Features and fixes. Feature: - trackers may configure custom stop-words for the full-text indexer Fixed: - fixes in scripts/import_sf.py - fix some unicode bugs in roundup-admin import - Xapian indexer wasn't actually being used - fix indexing of message content on roundup-admin import
author Richard Jones <richard@users.sourceforge.net>
date Mon, 06 Feb 2006 21:00:47 +0000
parents a8c2371f45b6
children 5f4db2650da3
line wrap: on
line diff
--- a/roundup/backends/indexer_common.py	Mon Feb 06 02:35:47 2006 +0000
+++ b/roundup/backends/indexer_common.py	Mon Feb 06 21:00:47 2006 +0000
@@ -1,26 +1,29 @@
-#$Id: indexer_common.py,v 1.4 2005-01-08 16:16:59 jlgijsbers Exp $
-import re
+#$Id: indexer_common.py,v 1.5 2006-02-06 21:00:47 richard Exp $
+import re, sets
 
 from roundup import hyperdb
 
-stopwords = [
-"A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
-"FOR", "IF", "IN", "INTO", "IS", "IT",
-"NO", "NOT", "OF", "ON", "OR", "SUCH",
-"THAT", "THE", "THEIR", "THEN", "THERE", "THESE",
-"THEY", "THIS", "TO", "WAS", "WILL", "WITH" 
+STOPWORDS = [
+    "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
+    "FOR", "IF", "IN", "INTO", "IS", "IT",
+    "NO", "NOT", "OF", "ON", "OR", "SUCH",
+    "THAT", "THE", "THEIR", "THEN", "THERE", "THESE",
+    "THEY", "THIS", "TO", "WAS", "WILL", "WITH" 
 ]
 
-is_stopword = {}
-for word in stopwords:
-    is_stopword[word] = None
-is_stopword = is_stopword.has_key
-
 def _isLink(propclass):
     return (isinstance(propclass, hyperdb.Link) or
             isinstance(propclass, hyperdb.Multilink))
 
 class Indexer:    
+    def __init__(self, db):
+        self.stopwords = sets.Set(STOPWORDS)
+        for word in db.config[('main', 'indexer_stopwords')]:
+            self.stopwords.add(word)
+
+    def is_stopword(self, word):
+        return word in self.stopwords
+
     def getHits(self, search_terms, klass):
         return self.find(search_terms)
     

Roundup Issue Tracker: http://roundup-tracker.org/