Mercurial > p > roundup > code
view roundup/backends/indexer_rdbms.py @ 8540:e8d1da6e3571
bug: fix traceback in roundup-admin init with bad config values
initialize accepts setting values for config.ini file settings. If
they are not valid, you got a python traceback.
ConfigurationError exceptions are now trapped. The admin.py's
usageError_feedback method is used to inform the user. Also the
feedback message now starts with a newline making it easier to read by
separating it from command that caused the issue.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Mon, 23 Mar 2026 13:18:41 -0400 |
| parents | 8bda74ee7070 |
| children |
line wrap: on
line source
""" This implements the full-text indexer over two RDBMS tables. The first is a mapping of words to occurance IDs. The second maps the IDs to (Class, propname, itemid) instances. """ import re from roundup.backends.indexer_common import Indexer as IndexerBase from roundup.anypy.strings import us2u, u2s class Indexer(IndexerBase): def __init__(self, db): IndexerBase.__init__(self, db) self.db = db self.reindex = 0 def close(self): """close the indexing database""" # just nuke the circular reference self.db = None def save_index(self): """Save the changes to the index.""" # not necessary - the RDBMS connection will handle this for us pass def force_reindex(self): """Force a reindexing of the database. This essentially empties the tables ids and index and sets a flag so that the databases are reindexed""" self.reindex = 1 def should_reindex(self): """returns True if the indexes need to be rebuilt""" return self.reindex def add_text(self, identifier, text, mime_type='text/plain'): """ "identifier" is (classname, itemid, property) """ if mime_type != 'text/plain': return # Ensure all elements of the identifier are strings 'cos the itemid # column is varchar even if item ids may be numbers elsewhere in the # code. ugh. identifier = tuple(map(str, identifier)) # first, find the id of the (classname, itemid, property) a = self.db.arg sql = 'select _textid from __textids where _class=%s and '\ '_itemid=%s and _prop=%s' % (a, a, a) self.db.cursor.execute(sql, identifier) r = self.db.cursor.fetchone() if not r: # not previously indexed id = self.db.newid('__textids') sql = 'insert into __textids (_textid, _class, _itemid, _prop)'\ ' values (%s, %s, %s, %s)' % (a, a, a, a) self.db.cursor.execute(sql, (id, ) + identifier) else: id = int(r[0]) # clear out any existing indexed values sql = 'delete from __words where _textid=%s' % a self.db.cursor.execute(sql, (id, )) # ok, find all the unique words in the text text = us2u(text, "replace") text = text.upper() wordlist = [u2s(w) for w in re.findall(r'(?u)\b\w{%d,%d}\b' % (self.minlength, self.maxlength), text)] words = set() for word in wordlist: if self.is_stopword(word): continue words.add(word) # for each word, add an entry in the db sql = 'insert into __words (_word, _textid) values (%s, %s)' % (a, a) words = [(word, id) for word in words] self.db.cursor.executemany(sql, words) def find(self, wordlist): """look up all the words in the wordlist. If none are found return an empty dictionary * more rules here """ if not wordlist: return [] cap_wl = [word.upper() for word in wordlist if self.minlength <= len(word) <= self.maxlength] clean_wl = [word for word in cap_wl if not self.is_stopword(word)] if not clean_wl: return [] if self.db.implements_intersect: # simple AND search sql = 'select distinct(_textid) from __words where _word=%s' % ( self.db.arg) sql = '\nINTERSECT\n'.join([sql]*len(clean_wl)) self.db.cursor.execute(sql, tuple(clean_wl)) r = self.db.cursor.fetchall() if not r: return [] a = ','.join([self.db.arg] * len(r)) sql = 'select _class, _itemid, _prop from __textids '\ 'where _textid in (%s)' % a self.db.cursor.execute(sql, tuple([int(row[0]) for row in r])) else: # A more complex version for MySQL since it doesn't # implement INTERSECT # Construct SQL statement to join __words table to itself # multiple times. sql = """select distinct(__words1._textid) from __words as __words1 %s where __words1._word=%s %s""" join_tmpl = ' left join __words as __words%d using (_textid) \n' match_tmpl = ' and __words%d._word=%s \n' join_list = [] match_list = [] for n in range(len(clean_wl) - 1): join_list.append(join_tmpl % (n + 2)) match_list.append(match_tmpl % (n + 2, self.db.arg)) sql = sql % (' '.join(join_list), self.db.arg, ' '.join(match_list)) self.db.cursor.execute(sql, clean_wl) r = [x[0] for x in self.db.cursor.fetchall()] if not r: return [] a = ','.join([self.db.arg] * len(r)) sql = 'select _class, _itemid, _prop from __textids '\ 'where _textid in (%s)' % a self.db.cursor.execute(sql, tuple(map(int, r))) return self.db.cursor.fetchall()
