diff roundup/backends/indexer_rdbms.py @ 2872:d530b68e4b42

don't index common words [SF#1046612]
author Richard Jones <richard@users.sourceforge.net>
date Fri, 05 Nov 2004 05:10:07 +0000
parents 18addf2a8596
children f8d0fd056ac0
line wrap: on
line diff
--- a/roundup/backends/indexer_rdbms.py	Fri Nov 05 04:55:52 2004 +0000
+++ b/roundup/backends/indexer_rdbms.py	Fri Nov 05 05:10:07 2004 +0000
@@ -4,10 +4,9 @@
 '''
 import re
 
-from indexer_dbm import Indexer
+from indexer_dbm import Indexer, is_stopword
 
 class Indexer(Indexer):
-    disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1}
     def __init__(self, db):
         self.db = db
         self.reindex = 0
@@ -55,8 +54,9 @@
         wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper())
         words = {}
         for word in wordlist:
-            if not self.disallows.has_key(word):
-                words[word] = 1
+            if is_stopword(word):
+                continue
+            words[word] = 1
         words = words.keys()
 
         # for each word, add an entry in the db

Roundup Issue Tracker: http://roundup-tracker.org/