diff roundup/backends/indexer_dbm.py @ 2872:d530b68e4b42

don't index common words [SF#1046612]
author Richard Jones <richard@users.sourceforge.net>
date Fri, 05 Nov 2004 05:10:07 +0000
parents 93f03c6714d8
children 1c063814d567
line wrap: on
line diff
--- a/roundup/backends/indexer_dbm.py	Fri Nov 05 04:55:52 2004 +0000
+++ b/roundup/backends/indexer_dbm.py	Fri Nov 05 05:10:07 2004 +0000
@@ -14,7 +14,7 @@
 #     that promote freedom, but obviously am giving up any rights
 #     to compel such.
 # 
-#$Id: indexer_dbm.py,v 1.1 2004-03-19 04:47:59 richard Exp $
+#$Id: indexer_dbm.py,v 1.2 2004-11-05 05:10:07 richard Exp $
 '''This module provides an indexer class, RoundupIndexer, that stores text
 indices in a roundup instance.  This class makes searching the content of
 messages, string properties and text files possible.
@@ -24,6 +24,18 @@
 import os, shutil, re, mimetypes, marshal, zlib, errno
 from roundup.hyperdb import Link, Multilink
 
+stopwords = [
+"A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
+"FOR", "IF", "IN", "INTO", "IS", "IT",
+"NO", "NOT", "OF", "ON", "OR", "SUCH",
+"THAT", "THE", "THEIR", "THEN", "THERE", "THESE",
+"THEY", "THIS", "TO", "WAS", "WILL", "WITH" 
+]
+is_stopword = {}
+for word in stopwords:
+    is_stopword[word] = None
+is_stopword = is_stopword.has_key
+
 class Indexer:
     '''Indexes information from roundup's hyperdb to allow efficient
     searching.
@@ -95,6 +107,8 @@
         # find the unique words
         filedict = {}
         for word in words:
+            if is_stopword(word):
+                continue
             if filedict.has_key(word):
                 filedict[word] = filedict[word]+1
             else:

Roundup Issue Tracker: http://roundup-tracker.org/