Mercurial > p > roundup > code
annotate roundup/backends/indexer_common.py @ 3092:a8c2371f45b6
Some cleanup:
* indexer_rdbms.Indexer now subclasses from indexer_common.Indexer instead of
indexer_dbm.Indexer
* rdbms_common doesn't call save_index anymore
* so we can remove save_index from indexer_rdbms and back_tsearch2
* indexer_rdbms.Indexer.rollback() wasn't used at all, so remove it
* move is_stopword code to indexer_common
| author | Johannes Gijsbers <jlgijsbers@users.sourceforge.net> |
|---|---|
| date | Sat, 08 Jan 2005 16:16:59 +0000 |
| parents | 6da931530497 |
| children | 5cd1c83dea50 |
| rev | line source |
|---|---|
|
3092
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
1 #$Id: indexer_common.py,v 1.4 2005-01-08 16:16:59 jlgijsbers Exp $ |
|
3058
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
2 import re |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
3 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
4 from roundup import hyperdb |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
5 |
|
3092
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
6 stopwords = [ |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
7 "A", "AND", "ARE", "AS", "AT", "BE", "BUT", "BY", |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
8 "FOR", "IF", "IN", "INTO", "IS", "IT", |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
9 "NO", "NOT", "OF", "ON", "OR", "SUCH", |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
10 "THAT", "THE", "THEIR", "THEN", "THERE", "THESE", |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
11 "THEY", "THIS", "TO", "WAS", "WILL", "WITH" |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
12 ] |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
13 |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
14 is_stopword = {} |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
15 for word in stopwords: |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
16 is_stopword[word] = None |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
17 is_stopword = is_stopword.has_key |
|
a8c2371f45b6
Some cleanup:
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3088
diff
changeset
|
18 |
|
3058
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
19 def _isLink(propclass): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
20 return (isinstance(propclass, hyperdb.Link) or |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
21 isinstance(propclass, hyperdb.Multilink)) |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
22 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
23 class Indexer: |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
24 def getHits(self, search_terms, klass): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
25 return self.find(search_terms) |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
26 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
27 def search(self, search_terms, klass, ignore={}): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
28 '''Display search results looking for [search, terms] associated |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
29 with the hyperdb Class "klass". Ignore hits on {class: property}. |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
30 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
31 "dre" is a helper, not an argument. |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
32 ''' |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
33 # do the index lookup |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
34 hits = self.getHits(search_terms, klass) |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
35 if not hits: |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
36 return {} |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
37 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
38 designator_propname = {} |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
39 for nm, propclass in klass.getprops().items(): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
40 if _isLink(propclass): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
41 designator_propname[propclass.classname] = nm |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
42 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
43 # build a dictionary of nodes and their associated messages |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
44 # and files |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
45 nodeids = {} # this is the answer |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
46 propspec = {} # used to do the klass.find |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
47 for propname in designator_propname.values(): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
48 propspec[propname] = {} # used as a set (value doesn't matter) |
|
3076
2817a4db901d
Change indexer_common.search() to take a list of nodeids...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
3058
diff
changeset
|
49 for classname, nodeid, property in hits: |
|
3058
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
50 # skip this result if we don't care about this class/property |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
51 if ignore.has_key((classname, property)): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
52 continue |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
53 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
54 # if it's a property on klass, it's easy |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
55 if classname == klass.classname: |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
56 if not nodeids.has_key(nodeid): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
57 nodeids[nodeid] = {} |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
58 continue |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
59 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
60 # make sure the class is a linked one, otherwise ignore |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
61 if not designator_propname.has_key(classname): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
62 continue |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
63 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
64 # it's a linked class - set up to do the klass.find |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
65 linkprop = designator_propname[classname] # eg, msg -> messages |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
66 propspec[linkprop][nodeid] = 1 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
67 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
68 # retain only the meaningful entries |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
69 for propname, idset in propspec.items(): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
70 if not idset: |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
71 del propspec[propname] |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
72 |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
73 # klass.find tells me the klass nodeids the linked nodes relate to |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
74 for resid in klass.find(**propspec): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
75 resid = str(resid) |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
76 if not nodeids.has_key(id): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
77 nodeids[resid] = {} |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
78 node_dict = nodeids[resid] |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
79 # now figure out where it came from |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
80 for linkprop in propspec.keys(): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
81 for nodeid in klass.get(resid, linkprop): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
82 if propspec[linkprop].has_key(nodeid): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
83 # OK, this node[propname] has a winner |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
84 if not node_dict.has_key(linkprop): |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
85 node_dict[linkprop] = [nodeid] |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
86 else: |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
87 node_dict[linkprop].append(nodeid) |
|
1c063814d567
Move search method duplicated in indexer_dbm and indexer_tsearch2...
Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
parents:
diff
changeset
|
88 return nodeids |
