Mercurial > p > roundup > code
annotate roundup/backends/indexer_rdbms.py @ 2623:4e1030d49cea
fix: Option defaults were applied as strings...
...not converted to internal representation;
add Option methods str2value and value2str for value conversion
from and to the strings kept in .ini file;
options MAIL_USERNAME, MAIL_TLS_KEYFILE and MAIL_TLS_CERTFILE
default to empty strings: NODEFAULT value is a configuration
error, and for these options it is ok to keep them unset.
| author | Alexander Smishlajev <a1s@users.sourceforge.net> |
|---|---|
| date | Sun, 25 Jul 2004 14:36:50 +0000 |
| parents | 18addf2a8596 |
| children | d530b68e4b42 |
| rev | line source |
|---|---|
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
1 ''' This implements the full-text indexer over two RDBMS tables. The first |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
2 is a mapping of words to occurance IDs. The second maps the IDs to (Class, |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
3 propname, itemid) instances. |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
4 ''' |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
5 import re |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
6 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
7 from indexer_dbm import Indexer |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
8 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
9 class Indexer(Indexer): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
10 disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1} |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
11 def __init__(self, db): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
12 self.db = db |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
13 self.reindex = 0 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
14 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
15 def close(self): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
16 '''close the indexing database''' |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
17 # just nuke the circular reference |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
18 self.db = None |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
19 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
20 def force_reindex(self): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
21 '''Force a reindexing of the database. This essentially |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
22 empties the tables ids and index and sets a flag so |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
23 that the databases are reindexed''' |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
24 self.reindex = 1 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
25 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
26 def should_reindex(self): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
27 '''returns True if the indexes need to be rebuilt''' |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
28 return self.reindex |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
29 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
30 def add_text(self, identifier, text, mime_type='text/plain'): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
31 ''' "identifier" is (classname, itemid, property) ''' |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
32 if mime_type != 'text/plain': |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
33 return |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
34 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
35 # first, find the id of the (classname, itemid, property) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
36 a = self.db.arg |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
37 sql = 'select _textid from __textids where _class=%s and '\ |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
38 '_itemid=%s and _prop=%s'%(a, a, a) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
39 self.db.cursor.execute(sql, identifier) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
40 r = self.db.cursor.fetchone() |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
41 if not r: |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
42 id = self.db.newid('__textids') |
|
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
43 sql = 'insert into __textids (_textid, _class, _itemid, _prop)'\ |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
44 ' values (%s, %s, %s, %s)'%(a, a, a, a) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
45 self.db.cursor.execute(sql, (id, ) + identifier) |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
46 self.db.cursor.execute('select max(_textid) from __textids') |
|
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
47 id = self.db.cursor.fetchone()[0] |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
48 else: |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
49 id = int(r[0]) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
50 # clear out any existing indexed values |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
51 sql = 'delete from __words where _textid=%s'%a |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
52 self.db.cursor.execute(sql, (id, )) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
53 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
54 # ok, find all the words in the text |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
55 wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper()) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
56 words = {} |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
57 for word in wordlist: |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
58 if not self.disallows.has_key(word): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
59 words[word] = 1 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
60 words = words.keys() |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
61 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
62 # for each word, add an entry in the db |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
63 for word in words: |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
64 # don't dupe |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
65 sql = 'select * from __words where _word=%s and _textid=%s'%(a, a) |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
66 self.db.cursor.execute(sql, (word, id)) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
67 if self.db.cursor.fetchall(): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
68 continue |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
69 sql = 'insert into __words (_word, _textid) values (%s, %s)'%(a, a) |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
70 self.db.cursor.execute(sql, (word, id)) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
71 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
72 def find(self, wordlist): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
73 '''look up all the words in the wordlist. |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
74 If none are found return an empty dictionary |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
75 * more rules here |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
76 ''' |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
77 l = [word.upper() for word in wordlist if 26 > len(word) > 2] |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
78 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
79 a = ','.join([self.db.arg] * len(l)) |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
80 sql = 'select distinct(_textid) from __words where _word in (%s)'%a |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
81 self.db.cursor.execute(sql, tuple(l)) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
82 r = self.db.cursor.fetchall() |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
83 if not r: |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
84 return {} |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
85 a = ','.join([self.db.arg] * len(r)) |
|
2098
18addf2a8596
Implemented proper datatypes in mysql and postgresql backends...
Richard Jones <richard@users.sourceforge.net>
parents:
2093
diff
changeset
|
86 sql = 'select _class, _itemid, _prop from __textids '\ |
|
2093
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
87 'where _textid in (%s)'%a |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
88 self.db.cursor.execute(sql, tuple([int(id) for (id,) in r])) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
89 # self.search_index has the results as {some id: identifier} ... |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
90 # sigh |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
91 r = {} |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
92 k = 0 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
93 for c,n,p in self.db.cursor.fetchall(): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
94 key = (str(c), str(n), str(p)) |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
95 r[k] = key |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
96 k += 1 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
97 return r |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
98 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
99 def save_index(self): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
100 # the normal RDBMS backend transaction mechanisms will handle this |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
101 pass |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
102 |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
103 def rollback(self): |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
104 # the normal RDBMS backend transaction mechanisms will handle this |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
105 pass |
|
3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
106 |
