Mercurial > p > roundup > code
changeset 3058:1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
...to indexer_common.
| author | Johannes Gijsbers <jlgijsbers@users.sourceforge.net> |
|---|---|
| date | Tue, 04 Jan 2005 16:48:46 +0000 |
| parents | b77961cdb9a0 |
| children | 213468d6e7c9 |
| files | roundup/backends/back_tsearch2.py roundup/backends/indexer_common.py roundup/backends/indexer_dbm.py |
| diffstat | 3 files changed, 103 insertions(+), 148 deletions(-) [+] |
line wrap: on
line diff
--- a/roundup/backends/back_tsearch2.py Tue Jan 04 03:29:07 2005 +0000 +++ b/roundup/backends/back_tsearch2.py Tue Jan 04 16:48:46 2005 +0000 @@ -9,6 +9,7 @@ from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms from roundup.backends.back_postgresql import db_create, db_nuke, db_command from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass +from roundup.backends.indexer_common import _isLink, Indexer # XXX: Should probably be on the Class class. def _indexedProps(spec): @@ -24,10 +25,6 @@ query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict return query_dict -def _isLink(propclass): - return (isinstance(propclass, hyperdb.Link) or - isinstance(propclass, hyperdb.Multilink)) - class Database(back_postgresql.Database): def __init__(self, config, journaltag=None): back_postgresql.Database.__init__(self, config, journaltag) @@ -86,86 +83,16 @@ cols.append(('idxFTI', 'tsvector')) return cols, mls -class Indexer: +class Indexer(Indexer): def __init__(self, db): self.db = db - def force_reindex(self): - pass - + # This indexer never needs to reindex. def should_reindex(self): - pass - - def save_index(self): - pass - - def add_text(self, identifier, text, mime_type=None): - pass - - def close(self): - pass - - def search(self, search_terms, klass, ignore={}, - dre=re.compile(r'([^\d]+)(\d+)')): - '''Display search results looking for [search, terms] associated - with the hyperdb Class "klass". Ignore hits on {class: property}. - - "dre" is a helper, not an argument. - ''' - # do the index lookup - hits = self.find(search_terms, klass) - if not hits: - return {} - - designator_propname = {} - for nm, propclass in klass.getprops().items(): - if (isinstance(propclass, hyperdb.Link) - or isinstance(propclass, hyperdb.Multilink)): - designator_propname[propclass.classname] = nm + return False - # build a dictionary of nodes and their associated messages - # and files - nodeids = {} # this is the answer - propspec = {} # used to do the klass.find - for propname in designator_propname.values(): - propspec[propname] = {} # used as a set (value doesn't matter) - - for classname, nodeid in hits: - # if it's a property on klass, it's easy - if classname == klass.classname: - if not nodeids.has_key(nodeid): - nodeids[nodeid] = {} - continue - - # make sure the class is a linked one, otherwise ignore - if not designator_propname.has_key(classname): - continue - - # it's a linked class - set up to do the klass.find - linkprop = designator_propname[classname] # eg, msg -> messages - propspec[linkprop][nodeid] = 1 - - # retain only the meaningful entries - for propname, idset in propspec.items(): - if not idset: - del propspec[propname] - - # klass.find tells me the klass nodeids the linked nodes relate to - for resid in klass.find(**propspec): - resid = str(resid) - if not nodeids.has_key(id): - nodeids[resid] = {} - node_dict = nodeids[resid] - # now figure out where it came from - for linkprop in propspec.keys(): - for nodeid in klass.get(resid, linkprop): - if propspec[linkprop].has_key(nodeid): - # OK, this node[propname] has a winner - if not node_dict.has_key(linkprop): - node_dict[linkprop] = [nodeid] - else: - node_dict[linkprop].append(nodeid) - return nodeids + def getHits(self, search_terms, klass): + return self.find(search_terms, klass) def find(self, search_terms, klass): if not search_terms: @@ -178,7 +105,7 @@ if _isLink(propclass): nodeids.extend(self.tsearchQuery(propclass.classname, search_terms)) - return nodeids + return dict(enumerate(nodeids)) def tsearchQuery(self, classname, search_terms): query = """SELECT id FROM _%(classname)s @@ -196,8 +123,25 @@ if 'type' in klass.getprops(): nodeids = [nodeid for nodeid in nodeids if klass.get(nodeid, 'type') == 'text/plain'] - - return [(classname, nodeid) for nodeid in nodeids] + + # XXX: We haven't implemented property-level search, so I'm just faking + # it here with a property named 'XXX'. We still need to fix the other + # backends and indexer_common.Indexer.search to only want to unpack two + # values. + return [(classname, nodeid, 'XXX') for nodeid in nodeids] + + # These only exist to satisfy the interface that's expected from indexers. + def force_reindex(self): + pass + + def save_index(self): + pass + + def add_text(self, identifier, text, mime_type=None): + pass + + def close(self): + pass class FileClass(hyperdb.FileClass, Class): '''This class defines a large chunk of data. To support this, it has a
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roundup/backends/indexer_common.py Tue Jan 04 16:48:46 2005 +0000 @@ -0,0 +1,74 @@ +import re + +from roundup import hyperdb + +def _isLink(propclass): + return (isinstance(propclass, hyperdb.Link) or + isinstance(propclass, hyperdb.Multilink)) + +class Indexer: + def getHits(self, search_terms, klass): + return self.find(search_terms) + + def search(self, search_terms, klass, ignore={}): + '''Display search results looking for [search, terms] associated + with the hyperdb Class "klass". Ignore hits on {class: property}. + + "dre" is a helper, not an argument. + ''' + # do the index lookup + hits = self.getHits(search_terms, klass) + if not hits: + return {} + + designator_propname = {} + for nm, propclass in klass.getprops().items(): + if _isLink(propclass): + designator_propname[propclass.classname] = nm + + # build a dictionary of nodes and their associated messages + # and files + nodeids = {} # this is the answer + propspec = {} # used to do the klass.find + for propname in designator_propname.values(): + propspec[propname] = {} # used as a set (value doesn't matter) + for classname, nodeid, property in hits.values(): + # skip this result if we don't care about this class/property + if ignore.has_key((classname, property)): + continue + + # if it's a property on klass, it's easy + if classname == klass.classname: + if not nodeids.has_key(nodeid): + nodeids[nodeid] = {} + continue + + # make sure the class is a linked one, otherwise ignore + if not designator_propname.has_key(classname): + continue + + # it's a linked class - set up to do the klass.find + linkprop = designator_propname[classname] # eg, msg -> messages + propspec[linkprop][nodeid] = 1 + + # retain only the meaningful entries + for propname, idset in propspec.items(): + if not idset: + del propspec[propname] + + # klass.find tells me the klass nodeids the linked nodes relate to + for resid in klass.find(**propspec): + resid = str(resid) + if not nodeids.has_key(id): + nodeids[resid] = {} + node_dict = nodeids[resid] + # now figure out where it came from + for linkprop in propspec.keys(): + for nodeid in klass.get(resid, linkprop): + if propspec[linkprop].has_key(nodeid): + # OK, this node[propname] has a winner + if not node_dict.has_key(linkprop): + node_dict[linkprop] = [nodeid] + else: + node_dict[linkprop].append(nodeid) + return nodeids
--- a/roundup/backends/indexer_dbm.py Tue Jan 04 03:29:07 2005 +0000 +++ b/roundup/backends/indexer_dbm.py Tue Jan 04 16:48:46 2005 +0000 @@ -14,7 +14,7 @@ # that promote freedom, but obviously am giving up any rights # to compel such. # -#$Id: indexer_dbm.py,v 1.2 2004-11-05 05:10:07 richard Exp $ +#$Id: indexer_dbm.py,v 1.3 2005-01-04 16:48:46 jlgijsbers Exp $ '''This module provides an indexer class, RoundupIndexer, that stores text indices in a roundup instance. This class makes searching the content of messages, string properties and text files possible. @@ -23,6 +23,7 @@ import os, shutil, re, mimetypes, marshal, zlib, errno from roundup.hyperdb import Link, Multilink +from roundup.backends.indexer_common import Indexer stopwords = [ "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY", @@ -36,7 +37,7 @@ is_stopword[word] = None is_stopword = is_stopword.has_key -class Indexer: +class Indexer(Indexer): '''Indexes information from roundup's hyperdb to allow efficient searching. @@ -151,70 +152,6 @@ # place return re.findall(r'\b\w{2,25}\b', text) - def search(self, search_terms, klass, ignore={}, - dre=re.compile(r'([^\d]+)(\d+)')): - '''Display search results looking for [search, terms] associated - with the hyperdb Class "klass". Ignore hits on {class: property}. - - "dre" is a helper, not an argument. - ''' - # do the index lookup - hits = self.find(search_terms) - if not hits: - return {} - - designator_propname = {} - for nm, propclass in klass.getprops().items(): - if isinstance(propclass, Link) or isinstance(propclass, Multilink): - designator_propname[propclass.classname] = nm - - # build a dictionary of nodes and their associated messages - # and files - nodeids = {} # this is the answer - propspec = {} # used to do the klass.find - for propname in designator_propname.values(): - propspec[propname] = {} # used as a set (value doesn't matter) - for classname, nodeid, property in hits.values(): - # skip this result if we don't care about this class/property - if ignore.has_key((classname, property)): - continue - - # if it's a property on klass, it's easy - if classname == klass.classname: - if not nodeids.has_key(nodeid): - nodeids[nodeid] = {} - continue - - # make sure the class is a linked one, otherwise ignore - if not designator_propname.has_key(classname): - continue - - # it's a linked class - set up to do the klass.find - linkprop = designator_propname[classname] # eg, msg -> messages - propspec[linkprop][nodeid] = 1 - - # retain only the meaningful entries - for propname, idset in propspec.items(): - if not idset: - del propspec[propname] - - # klass.find tells me the klass nodeids the linked nodes relate to - for resid in klass.find(**propspec): - resid = str(resid) - if not nodeids.has_key(id): - nodeids[resid] = {} - node_dict = nodeids[resid] - # now figure out where it came from - for linkprop in propspec.keys(): - for nodeid in klass.get(resid, linkprop): - if propspec[linkprop].has_key(nodeid): - # OK, this node[propname] has a winner - if not node_dict.has_key(linkprop): - node_dict[linkprop] = [nodeid] - else: - node_dict[linkprop].append(nodeid) - return nodeids - # we override this to ignore not 2 < word < 25 and also to fix a bug - # the (fail) case. def find(self, wordlist):
