Mercurial > p > roundup > code
changeset 834:568eed5fb4fd
Optimize Class.find so that the propspec can contain a set of ids to match.
This is used by indexer.search so it can do just one find for all the
index matches.
This was already confusing code, but for common terms (lots of index matches),
it is enormously faster.
| author | Gordon B. McMillan <gmcm@users.sourceforge.net> |
|---|---|
| date | Tue, 09 Jul 2002 21:53:38 +0000 |
| parents | b80aaedba3db |
| children | 255bdcf39e8c |
| files | roundup/hyperdb.py roundup/indexer.py |
| diffstat | 2 files changed, 83 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/roundup/hyperdb.py Tue Jul 09 21:38:43 2002 +0000 +++ b/roundup/hyperdb.py Tue Jul 09 21:53:38 2002 +0000 @@ -15,7 +15,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: hyperdb.py,v 1.71 2002-07-09 03:02:52 richard Exp $ +# $Id: hyperdb.py,v 1.72 2002-07-09 21:53:38 gmcm Exp $ __doc__ = """ Hyperdatabase implementation, especially field types. @@ -785,24 +785,28 @@ # XXX: change from spec - allows multiple props to match def find(self, **propspec): - """Get the ids of nodes in this class which link to a given node. + """Get the ids of nodes in this class which link to the given nodes. - 'propspec' consists of keyword args propname=nodeid + 'propspec' consists of keyword args propname={nodeid:1,} 'propname' must be the name of a property in this class, or a KeyError is raised. That property must be a Link or Multilink property, or a TypeError is raised. - 'nodeid' must be the id of an existing node in the class linked - to by the given property, or an IndexError is raised. + Any node in this class whose 'propname' property links to any of the + nodeids will be returned. Used by the full text indexing, which knows + that "foo" occurs in msg1, msg3 and file7, so we have hits on these issues: + db.issue.find(messages={'1':1,'3':1}, files={'7':1}) """ propspec = propspec.items() - for propname, nodeid in propspec: + for propname, nodeids in propspec: # check the prop is OK prop = self.properties[propname] if not isinstance(prop, Link) and not isinstance(prop, Multilink): raise TypeError, "'%s' not a Link/Multilink property"%propname - if not self.db.hasnode(prop.classname, nodeid): - raise ValueError, '%s has no node %s'%(prop.classname, nodeid) + #XXX edit is expensive and of questionable use + #for nodeid in nodeids: + # if not self.db.hasnode(prop.classname, nodeid): + # raise ValueError, '%s has no node %s'%(prop.classname, nodeid) # ok, now do the find cldb = self.db.getclassdb(self.classname) @@ -811,16 +815,26 @@ node = self.db.getnode(self.classname, id, db=cldb) if node.has_key(self.db.RETIRED_FLAG): continue - for propname, nodeid in propspec: + for propname, nodeids in propspec: # can't test if the node doesn't have this property if not node.has_key(propname): continue + if type(nodeids) is type(''): + nodeids = {nodeids:1} prop = self.properties[propname] - property = node[propname] - if isinstance(prop, Link) and nodeid == property: + value = node[propname] + if isinstance(prop, Link) and nodeids.has_key(value): l.append(id) - elif isinstance(prop, Multilink) and nodeid in property: - l.append(id) + break + elif isinstance(prop, Multilink): + hit = 0 + for v in value: + if nodeids.has_key(v): + l.append(id) + hit = 1 + break + if hit: + break return l def stringFind(self, **requirements): @@ -1185,6 +1199,22 @@ # # $Log: not supported by cvs2svn $ +# Revision 1.71 2002/07/09 03:02:52 richard +# More indexer work: +# - all String properties may now be indexed too. Currently there's a bit of +# "issue" specific code in the actual searching which needs to be +# addressed. In a nutshell: +# + pass 'indexme="yes"' as a String() property initialisation arg, eg: +# file = FileClass(db, "file", name=String(), type=String(), +# comment=String(indexme="yes")) +# + the comment will then be indexed and be searchable, with the results +# related back to the issue that the file is linked to +# - as a result of this work, the FileClass has a default MIME type that may +# be overridden in a subclass, or by the use of a "type" property as is +# done in the default templates. +# - the regeneration of the indexes (if necessary) is done once the schema is +# set up in the dbinit. +# # Revision 1.70 2002/06/27 12:06:20 gmcm # Improve an error message. #
--- a/roundup/indexer.py Tue Jul 09 21:38:43 2002 +0000 +++ b/roundup/indexer.py Tue Jul 09 21:53:38 2002 +0000 @@ -14,13 +14,14 @@ # that promote freedom, but obviously am giving up any rights # to compel such. # -#$Id: indexer.py,v 1.7 2002-07-09 21:38:43 richard Exp $ +#$Id: indexer.py,v 1.8 2002-07-09 21:53:38 gmcm Exp $ ''' This module provides an indexer class, RoundupIndexer, that stores text indices in a roundup instance. This class makes searching the content of -messages and text files possible. +messages, string properties and text files possible. ''' import os, shutil, re, mimetypes, marshal, zlib, errno +from hyperdb import Link, Multilink class Indexer: ''' Indexes information from roundup's hyperdb to allow efficient @@ -30,6 +31,7 @@ files {identifier: (fileid, wordcount)} words {word: {fileid: count}} fileids {fileid: identifier} + where identifier is (classname, nodeid, propertyname) ''' def __init__(self, db_path): self.indexdb_path = os.path.join(db_path, 'indexes') @@ -139,12 +141,18 @@ if not hits: return {} - # this is specific to "issue" klass ... eugh - designator_propname = {'msg': 'messages', 'file': 'files'} + #designator_propname = {'msg': 'messages', 'file': 'files'} + designator_propname = {} + for nm, propclass in klass.getprops().items(): + if isinstance(propclass, Link) or isinstance(propclass, Multilink): + designator_propname[propclass.classname] = nm # build a dictionary of nodes and their associated messages # and files - nodeids = {} + nodeids = {} # this is the answer + propspec = {} # used to do the klass.find + for propname in designator_propname.values(): + propspec[propname] = {} # used as a set (value doesn't matter) for classname, nodeid, property in hits.values(): # skip this result if we don't care about this class/property if ignore.has_key((classname, property)): @@ -156,20 +164,30 @@ nodeids[nodeid] = {} continue - # it's a linked class - find the klass entries that are - # linked to it - linkprop = designator_propname[classname] - for resid in klass.find(**{linkprop: nodeid}): - resid = str(resid) - if not nodeids.has_key(id): - nodeids[resid] = {} + # it's a linked class - set up to do the klass.find + linkprop = designator_propname[classname] # eg, msg -> messages + propspec[linkprop][nodeid] = 1 - # update the links for this klass nodeid - node_dict = nodeids[resid] - if not node_dict.has_key(linkprop): - node_dict[linkprop] = [nodeid] - elif node_dict.has_key(linkprop): - node_dict[linkprop].append(nodeid) + # retain only the meaningful entries + for propname, idset in propspec.items(): + if not idset: + del propspec[propname] + + # klass.find tells me the klass nodeids the linked nodes relate to + for resid in klass.find(**propspec): + resid = str(resid) + if not nodeids.has_key(id): + nodeids[resid] = {} + node_dict = nodeids[resid] + # now figure out where it came from + for linkprop in propspec.keys(): + for nodeid in klass.get(resid, linkprop): + if propspec[linkprop].has_key(nodeid): + # OK, this node[propname] has a winner + if not node_dict.has_key(linkprop): + node_dict[linkprop] = [nodeid] + else: + node_dict[linkprop].append(nodeid) return nodeids # we override this to ignore not 2 < word < 25 and also to fix a bug - @@ -311,6 +329,10 @@ # #$Log: not supported by cvs2svn $ +#Revision 1.7 2002/07/09 21:38:43 richard +#Only save the index if the thing is loaded and changed. Also, don't load +#the index just for a save. +# #Revision 1.6 2002/07/09 04:26:44 richard #We're indexing numbers now, and _underscore words #
