changeset 3058:1c063814d567

Move search method duplicated in indexer_dbm and indexer_tsearch2... ...to indexer_common.
author Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
date Tue, 04 Jan 2005 16:48:46 +0000
parents b77961cdb9a0
children 213468d6e7c9
files roundup/backends/back_tsearch2.py roundup/backends/indexer_common.py roundup/backends/indexer_dbm.py
diffstat 3 files changed, 103 insertions(+), 148 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/backends/back_tsearch2.py	Tue Jan 04 03:29:07 2005 +0000
+++ b/roundup/backends/back_tsearch2.py	Tue Jan 04 16:48:46 2005 +0000
@@ -9,6 +9,7 @@
 from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms
 from roundup.backends.back_postgresql import db_create, db_nuke, db_command
 from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass
+from roundup.backends.indexer_common import _isLink, Indexer
 
 # XXX: Should probably be on the Class class.
 def _indexedProps(spec):
@@ -24,10 +25,6 @@
     query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict
     return query_dict
 
-def _isLink(propclass):
-    return (isinstance(propclass, hyperdb.Link) or
-            isinstance(propclass, hyperdb.Multilink))
-
 class Database(back_postgresql.Database):
     def __init__(self, config, journaltag=None):
         back_postgresql.Database.__init__(self, config, journaltag)
@@ -86,86 +83,16 @@
         cols.append(('idxFTI', 'tsvector'))
         return cols, mls
         
-class Indexer:
+class Indexer(Indexer):
     def __init__(self, db):
         self.db = db
 
-    def force_reindex(self):
-        pass
-        
+    # This indexer never needs to reindex.
     def should_reindex(self):
-        pass
-
-    def save_index(self):
-        pass
-
-    def add_text(self, identifier, text, mime_type=None):
-        pass
-
-    def close(self):
-        pass
-    
-    def search(self, search_terms, klass, ignore={},
-               dre=re.compile(r'([^\d]+)(\d+)')):
-        '''Display search results looking for [search, terms] associated
-        with the hyperdb Class "klass". Ignore hits on {class: property}.
-
-        "dre" is a helper, not an argument.
-        '''
-        # do the index lookup
-        hits = self.find(search_terms, klass)
-        if not hits:
-            return {}
-
-        designator_propname = {}
-        for nm, propclass in klass.getprops().items():
-            if (isinstance(propclass, hyperdb.Link)
-                or isinstance(propclass, hyperdb.Multilink)):
-                designator_propname[propclass.classname] = nm
+        return False
 
-        # build a dictionary of nodes and their associated messages
-        # and files
-        nodeids = {}      # this is the answer
-        propspec = {}     # used to do the klass.find
-        for propname in designator_propname.values():
-            propspec[propname] = {}   # used as a set (value doesn't matter)
-
-        for classname, nodeid in hits:
-            # if it's a property on klass, it's easy
-            if classname == klass.classname:
-                if not nodeids.has_key(nodeid):
-                    nodeids[nodeid] = {}
-                continue
-
-            # make sure the class is a linked one, otherwise ignore
-            if not designator_propname.has_key(classname):
-                continue
-
-            # it's a linked class - set up to do the klass.find
-            linkprop = designator_propname[classname]   # eg, msg -> messages
-            propspec[linkprop][nodeid] = 1
-
-        # retain only the meaningful entries
-        for propname, idset in propspec.items():
-            if not idset:
-                del propspec[propname]
-        
-        # klass.find tells me the klass nodeids the linked nodes relate to
-        for resid in klass.find(**propspec):
-            resid = str(resid)
-            if not nodeids.has_key(id):
-                nodeids[resid] = {}
-            node_dict = nodeids[resid]
-            # now figure out where it came from
-            for linkprop in propspec.keys():
-                for nodeid in klass.get(resid, linkprop):
-                    if propspec[linkprop].has_key(nodeid):
-                        # OK, this node[propname] has a winner
-                        if not node_dict.has_key(linkprop):
-                            node_dict[linkprop] = [nodeid]
-                        else:
-                            node_dict[linkprop].append(nodeid)
-        return nodeids
+    def getHits(self, search_terms, klass):
+        return self.find(search_terms, klass)    
     
     def find(self, search_terms, klass):
         if not search_terms:
@@ -178,7 +105,7 @@
             if _isLink(propclass):
                 nodeids.extend(self.tsearchQuery(propclass.classname, search_terms))
 
-        return nodeids
+        return dict(enumerate(nodeids))
 
     def tsearchQuery(self, classname, search_terms):
         query = """SELECT id FROM _%(classname)s
@@ -196,8 +123,25 @@
         if 'type' in klass.getprops():
             nodeids = [nodeid for nodeid in nodeids
                        if klass.get(nodeid, 'type') == 'text/plain']
-            
-        return [(classname, nodeid) for nodeid in nodeids]
+
+        # XXX: We haven't implemented property-level search, so I'm just faking
+        # it here with a property named 'XXX'. We still need to fix the other
+        # backends and indexer_common.Indexer.search to only want to unpack two
+        # values.
+        return [(classname, nodeid, 'XXX') for nodeid in nodeids]
+
+    # These only exist to satisfy the interface that's expected from indexers.
+    def force_reindex(self):
+        pass
+    
+    def save_index(self):
+        pass
+
+    def add_text(self, identifier, text, mime_type=None):
+        pass
+
+    def close(self):
+        pass
 
 class FileClass(hyperdb.FileClass, Class):
     '''This class defines a large chunk of data. To support this, it has a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/roundup/backends/indexer_common.py	Tue Jan 04 16:48:46 2005 +0000
@@ -0,0 +1,74 @@
+import re
+
+from roundup import hyperdb
+
+def _isLink(propclass):
+    return (isinstance(propclass, hyperdb.Link) or
+            isinstance(propclass, hyperdb.Multilink))
+
+class Indexer:    
+    def getHits(self, search_terms, klass):
+        return self.find(search_terms)
+    
+    def search(self, search_terms, klass, ignore={}):
+        '''Display search results looking for [search, terms] associated
+        with the hyperdb Class "klass". Ignore hits on {class: property}.
+
+        "dre" is a helper, not an argument.
+        '''
+        # do the index lookup
+        hits = self.getHits(search_terms, klass)
+        if not hits:
+            return {}
+
+        designator_propname = {}
+        for nm, propclass in klass.getprops().items():
+            if _isLink(propclass):
+                designator_propname[propclass.classname] = nm
+
+        # build a dictionary of nodes and their associated messages
+        # and files
+        nodeids = {}      # this is the answer
+        propspec = {}     # used to do the klass.find
+        for propname in designator_propname.values():
+            propspec[propname] = {}   # used as a set (value doesn't matter)
+        for classname, nodeid, property in hits.values():
+            # skip this result if we don't care about this class/property
+            if ignore.has_key((classname, property)):
+                continue
+
+            # if it's a property on klass, it's easy
+            if classname == klass.classname:
+                if not nodeids.has_key(nodeid):
+                    nodeids[nodeid] = {}
+                continue
+
+            # make sure the class is a linked one, otherwise ignore
+            if not designator_propname.has_key(classname):
+                continue
+
+            # it's a linked class - set up to do the klass.find
+            linkprop = designator_propname[classname]   # eg, msg -> messages
+            propspec[linkprop][nodeid] = 1
+
+        # retain only the meaningful entries
+        for propname, idset in propspec.items():
+            if not idset:
+                del propspec[propname]
+        
+        # klass.find tells me the klass nodeids the linked nodes relate to
+        for resid in klass.find(**propspec):
+            resid = str(resid)
+            if not nodeids.has_key(id):
+                nodeids[resid] = {}
+            node_dict = nodeids[resid]
+            # now figure out where it came from
+            for linkprop in propspec.keys():
+                for nodeid in klass.get(resid, linkprop):
+                    if propspec[linkprop].has_key(nodeid):
+                        # OK, this node[propname] has a winner
+                        if not node_dict.has_key(linkprop):
+                            node_dict[linkprop] = [nodeid]
+                        else:
+                            node_dict[linkprop].append(nodeid)
+        return nodeids
--- a/roundup/backends/indexer_dbm.py	Tue Jan 04 03:29:07 2005 +0000
+++ b/roundup/backends/indexer_dbm.py	Tue Jan 04 16:48:46 2005 +0000
@@ -14,7 +14,7 @@
 #     that promote freedom, but obviously am giving up any rights
 #     to compel such.
 # 
-#$Id: indexer_dbm.py,v 1.2 2004-11-05 05:10:07 richard Exp $
+#$Id: indexer_dbm.py,v 1.3 2005-01-04 16:48:46 jlgijsbers Exp $
 '''This module provides an indexer class, RoundupIndexer, that stores text
 indices in a roundup instance.  This class makes searching the content of
 messages, string properties and text files possible.
@@ -23,6 +23,7 @@
 
 import os, shutil, re, mimetypes, marshal, zlib, errno
 from roundup.hyperdb import Link, Multilink
+from roundup.backends.indexer_common import Indexer
 
 stopwords = [
 "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY",
@@ -36,7 +37,7 @@
     is_stopword[word] = None
 is_stopword = is_stopword.has_key
 
-class Indexer:
+class Indexer(Indexer):
     '''Indexes information from roundup's hyperdb to allow efficient
     searching.
 
@@ -151,70 +152,6 @@
         # place
         return re.findall(r'\b\w{2,25}\b', text)
 
-    def search(self, search_terms, klass, ignore={},
-            dre=re.compile(r'([^\d]+)(\d+)')):
-        '''Display search results looking for [search, terms] associated
-        with the hyperdb Class "klass". Ignore hits on {class: property}.
-
-        "dre" is a helper, not an argument.
-        '''
-        # do the index lookup
-        hits = self.find(search_terms)
-        if not hits:
-            return {}
-
-        designator_propname = {}
-        for nm, propclass in klass.getprops().items():
-            if isinstance(propclass, Link) or isinstance(propclass, Multilink):
-                designator_propname[propclass.classname] = nm
-
-        # build a dictionary of nodes and their associated messages
-        # and files
-        nodeids = {}      # this is the answer
-        propspec = {}     # used to do the klass.find
-        for propname in designator_propname.values():
-            propspec[propname] = {}   # used as a set (value doesn't matter)
-        for classname, nodeid, property in hits.values():
-            # skip this result if we don't care about this class/property
-            if ignore.has_key((classname, property)):
-                continue
-
-            # if it's a property on klass, it's easy
-            if classname == klass.classname:
-                if not nodeids.has_key(nodeid):
-                    nodeids[nodeid] = {}
-                continue
-
-            # make sure the class is a linked one, otherwise ignore
-            if not designator_propname.has_key(classname):
-                continue
-
-            # it's a linked class - set up to do the klass.find
-            linkprop = designator_propname[classname]   # eg, msg -> messages
-            propspec[linkprop][nodeid] = 1
-
-        # retain only the meaningful entries
-        for propname, idset in propspec.items():
-            if not idset:
-                del propspec[propname]
-        
-        # klass.find tells me the klass nodeids the linked nodes relate to
-        for resid in klass.find(**propspec):
-            resid = str(resid)
-            if not nodeids.has_key(id):
-                nodeids[resid] = {}
-            node_dict = nodeids[resid]
-            # now figure out where it came from
-            for linkprop in propspec.keys():
-                for nodeid in klass.get(resid, linkprop):
-                    if propspec[linkprop].has_key(nodeid):
-                        # OK, this node[propname] has a winner
-                        if not node_dict.has_key(linkprop):
-                            node_dict[linkprop] = [nodeid]
-                        else:
-                            node_dict[linkprop].append(nodeid)
-        return nodeids
-
     # we override this to ignore not 2 < word < 25 and also to fix a bug -
     # the (fail) case.
     def find(self, wordlist):

Roundup Issue Tracker: http://roundup-tracker.org/