Mercurial > p > roundup > code
view roundup/backends/indexer_rdbms.py @ 2623:4e1030d49cea
fix: Option defaults were applied as strings...
...not converted to internal representation;
add Option methods str2value and value2str for value conversion
from and to the strings kept in .ini file;
options MAIL_USERNAME, MAIL_TLS_KEYFILE and MAIL_TLS_CERTFILE
default to empty strings: NODEFAULT value is a configuration
error, and for these options it is ok to keep them unset.
| author | Alexander Smishlajev <a1s@users.sourceforge.net> |
|---|---|
| date | Sun, 25 Jul 2004 14:36:50 +0000 |
| parents | 18addf2a8596 |
| children | d530b68e4b42 |
line wrap: on
line source
''' This implements the full-text indexer over two RDBMS tables. The first is a mapping of words to occurance IDs. The second maps the IDs to (Class, propname, itemid) instances. ''' import re from indexer_dbm import Indexer class Indexer(Indexer): disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1} def __init__(self, db): self.db = db self.reindex = 0 def close(self): '''close the indexing database''' # just nuke the circular reference self.db = None def force_reindex(self): '''Force a reindexing of the database. This essentially empties the tables ids and index and sets a flag so that the databases are reindexed''' self.reindex = 1 def should_reindex(self): '''returns True if the indexes need to be rebuilt''' return self.reindex def add_text(self, identifier, text, mime_type='text/plain'): ''' "identifier" is (classname, itemid, property) ''' if mime_type != 'text/plain': return # first, find the id of the (classname, itemid, property) a = self.db.arg sql = 'select _textid from __textids where _class=%s and '\ '_itemid=%s and _prop=%s'%(a, a, a) self.db.cursor.execute(sql, identifier) r = self.db.cursor.fetchone() if not r: id = self.db.newid('__textids') sql = 'insert into __textids (_textid, _class, _itemid, _prop)'\ ' values (%s, %s, %s, %s)'%(a, a, a, a) self.db.cursor.execute(sql, (id, ) + identifier) self.db.cursor.execute('select max(_textid) from __textids') id = self.db.cursor.fetchone()[0] else: id = int(r[0]) # clear out any existing indexed values sql = 'delete from __words where _textid=%s'%a self.db.cursor.execute(sql, (id, )) # ok, find all the words in the text wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper()) words = {} for word in wordlist: if not self.disallows.has_key(word): words[word] = 1 words = words.keys() # for each word, add an entry in the db for word in words: # don't dupe sql = 'select * from __words where _word=%s and _textid=%s'%(a, a) self.db.cursor.execute(sql, (word, id)) if self.db.cursor.fetchall(): continue sql = 'insert into __words (_word, _textid) values (%s, %s)'%(a, a) self.db.cursor.execute(sql, (word, id)) def find(self, wordlist): '''look up all the words in the wordlist. If none are found return an empty dictionary * more rules here ''' l = [word.upper() for word in wordlist if 26 > len(word) > 2] a = ','.join([self.db.arg] * len(l)) sql = 'select distinct(_textid) from __words where _word in (%s)'%a self.db.cursor.execute(sql, tuple(l)) r = self.db.cursor.fetchall() if not r: return {} a = ','.join([self.db.arg] * len(r)) sql = 'select _class, _itemid, _prop from __textids '\ 'where _textid in (%s)'%a self.db.cursor.execute(sql, tuple([int(id) for (id,) in r])) # self.search_index has the results as {some id: identifier} ... # sigh r = {} k = 0 for c,n,p in self.db.cursor.fetchall(): key = (str(c), str(n), str(p)) r[k] = key k += 1 return r def save_index(self): # the normal RDBMS backend transaction mechanisms will handle this pass def rollback(self): # the normal RDBMS backend transaction mechanisms will handle this pass
