Mercurial > p > roundup > code
changeset 2093:3f6024ab2c7a
That's the last of the RDBMS migration steps done! Yay!
Note that the code currently has some unit testing issues:
- Metakit needs some attention in a couple of areas
- RDBMS backends are having trouble ordering their journal entries
correctly. I'm going to be migrating them to use TIMESTAMP for the date
column, but that's not necessarily going to fix things as mysql and
postgresql both appear to have second granularity. Sqlite will ignore
the data type as usual ;)
Next up is the datatype-ification of the RDBMS backends. Part of that will
involve the migration to numeric IDs, which will also be done in the *dbm
backends (already done in metakit). The ID exposed *above* the hyperdb
will be String, since so many things assume a string ID now.
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Sun, 21 Mar 2004 23:39:08 +0000 |
| parents | 0e6db4462896 |
| children | 85d8ebe34c40 |
| files | TODO.txt roundup/admin.py roundup/backends/back_metakit.py roundup/backends/back_mysql.py roundup/backends/back_postgresql.py roundup/backends/back_sqlite.py roundup/backends/indexer_rdbms.py roundup/backends/rdbms_common.py |
| diffstat | 8 files changed, 159 insertions(+), 19 deletions(-) [+] |
line wrap: on
line diff
--- a/TODO.txt Fri Mar 19 05:27:55 2004 +0000 +++ b/TODO.txt Sun Mar 21 23:39:08 2004 +0000 @@ -1,10 +1,9 @@ This file has been re-purposed to contain specifically the items that need doing before the next release: -- indexing in RDBMSes -- add tests for group-by-multilink so I finally implement it for the RDBMSes -- s/getnode/getitem in backends (and s/Node/Item) - have rdbms backends look up the journal for actor if it's not set -- ensure index creation is triggered by the version 1->2 update +- migrate rdbms backends to use typed columns +- migrate to numeric ID values (fixes bug 817217) -- migrate to numeric ID values (fixes bug 817217) +- ensure index creation is triggered by the version 1->2 update + (and other upgrade tests)
--- a/roundup/admin.py Fri Mar 19 05:27:55 2004 +0000 +++ b/roundup/admin.py Sun Mar 21 23:39:08 2004 +0000 @@ -16,7 +16,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: admin.py,v 1.62 2004-02-11 23:55:08 richard Exp $ +# $Id: admin.py,v 1.63 2004-03-21 23:39:08 richard Exp $ '''Administration commands for maintaining Roundup trackers. ''' @@ -748,7 +748,7 @@ # convert types for propname, value in props.items(): try: - props[key] = hyperdb.rawToHyperdb(self.db, cl, None, + props[propname] = hyperdb.rawToHyperdb(self.db, cl, None, propname, value) except hyperdb.HyperdbValueError, message: raise UsageError, message
--- a/roundup/backends/back_metakit.py Fri Mar 19 05:27:55 2004 +0000 +++ b/roundup/backends/back_metakit.py Sun Mar 21 23:39:08 2004 +0000 @@ -1,4 +1,4 @@ -# $Id: back_metakit.py,v 1.65 2004-03-19 05:27:55 richard Exp $ +# $Id: back_metakit.py,v 1.66 2004-03-21 23:39:08 richard Exp $ '''Metakit backend for Roundup, originally by Gordon McMillan. Known Current Bugs: @@ -1873,12 +1873,12 @@ ids[oldpos].ignore = 1 self.changed = 1 pos = ids.append(tblid=tblid,nodeid=nodeid,propid=propid) - + wordlist = re.findall(r'\b\w{2,25}\b', text.upper()) words = {} for word in wordlist: - if not self.disallows.has_key(word): - words[word] = 1 + if not self.disallows.has_key(word): + words[word] = 1 words = words.keys() index = self.db.view('index').ordered(1)
--- a/roundup/backends/back_mysql.py Fri Mar 19 05:27:55 2004 +0000 +++ b/roundup/backends/back_mysql.py Sun Mar 21 23:39:08 2004 +0000 @@ -124,14 +124,29 @@ self.create_version_2_tables() def create_version_2_tables(self): + # OTK store self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), ' - 'otk_value VARCHAR(255), otk_time FLOAT(20))') + 'otk_value VARCHAR(255), otk_time FLOAT(20)) ' + 'TYPE=%s'%self.mysql_backend) self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)') + + # Sessions store self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), ' - 'session_time FLOAT(20), session_value VARCHAR(255))') + 'session_time FLOAT(20), session_value VARCHAR(255)) ' + 'TYPE=%s'%self.mysql_backend) self.cursor.execute('CREATE INDEX sessions_key_idx ON ' 'sessions(session_key)') + # full-text indexing store + self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), ' + '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT) ' + 'TYPE=%s'%self.mysql_backend) + self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), ' + '_textid INT) TYPE=%s'%self.mysql_backend) + self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)') + sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg) + self.cursor.execute(sql, ('_textids', 1)) + def add_actor_column(self): # update existing tables to have the new actor column tables = self.database_schema['tables']
--- a/roundup/backends/back_postgresql.py Fri Mar 19 05:27:55 2004 +0000 +++ b/roundup/backends/back_postgresql.py Sun Mar 21 23:39:08 2004 +0000 @@ -113,14 +113,26 @@ self.create_version_2_tables() def create_version_2_tables(self): + # OTK store self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), ' 'otk_value VARCHAR(255), otk_time FLOAT(20))') self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)') + + # Sessions store self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), ' 'session_time FLOAT(20), session_value VARCHAR(255))') self.cursor.execute('CREATE INDEX sessions_key_idx ON ' 'sessions(session_key)') + # full-text indexing store + self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), ' + '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT4) ') + self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), ' + '_textid INT4)') + self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)') + sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg) + self.cursor.execute(sql, ('_textids', 1)) + def add_actor_column(self): # update existing tables to have the new actor column tables = self.database_schema['tables']
--- a/roundup/backends/back_sqlite.py Fri Mar 19 05:27:55 2004 +0000 +++ b/roundup/backends/back_sqlite.py Sun Mar 21 23:39:08 2004 +0000 @@ -1,4 +1,4 @@ -# $Id: back_sqlite.py,v 1.17 2004-03-18 01:58:45 richard Exp $ +# $Id: back_sqlite.py,v 1.18 2004-03-21 23:39:08 richard Exp $ '''Implements a backend for SQLite. See https://pysqlite.sourceforge.net/ for pysqlite info @@ -65,6 +65,15 @@ self.cursor.execute('create index sessions_key_idx on ' 'sessions(session_key)') + # full-text indexing store + self.cursor.execute('CREATE TABLE _textids (_class varchar, ' + '_itemid varchar, _prop varchar, _textid integer) ') + self.cursor.execute('CREATE TABLE _words (_word varchar, ' + '_textid integer)') + self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)') + sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg) + self.cursor.execute(sql, ('_textids', 1)) + def add_actor_column(self): # update existing tables to have the new actor column tables = self.database_schema['tables']
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roundup/backends/indexer_rdbms.py Sun Mar 21 23:39:08 2004 +0000 @@ -0,0 +1,104 @@ +''' This implements the full-text indexer over two RDBMS tables. The first +is a mapping of words to occurance IDs. The second maps the IDs to (Class, +propname, itemid) instances. +''' +import re + +from indexer_dbm import Indexer + +class Indexer(Indexer): + disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1} + def __init__(self, db): + self.db = db + self.reindex = 0 + + def close(self): + '''close the indexing database''' + # just nuke the circular reference + self.db = None + + def force_reindex(self): + '''Force a reindexing of the database. This essentially + empties the tables ids and index and sets a flag so + that the databases are reindexed''' + self.reindex = 1 + + def should_reindex(self): + '''returns True if the indexes need to be rebuilt''' + return self.reindex + + def add_text(self, identifier, text, mime_type='text/plain'): + ''' "identifier" is (classname, itemid, property) ''' + if mime_type != 'text/plain': + return + + # first, find the id of the (classname, itemid, property) + a = self.db.arg + sql = 'select _textid from _textids where _class=%s and '\ + '_itemid=%s and _prop=%s'%(a, a, a) + self.db.cursor.execute(sql, identifier) + r = self.db.cursor.fetchone() + if not r: + id = self.db.newid('_textids') + sql = 'insert into _textids (_textid, _class, _itemid, _prop)'\ + ' values (%s, %s, %s, %s)'%(a, a, a, a) + self.db.cursor.execute(sql, (id, ) + identifier) + else: + id = int(r[0]) + # clear out any existing indexed values + sql = 'delete from _words where _textid=%s'%a + self.db.cursor.execute(sql, (id, )) + + # ok, find all the words in the text + wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper()) + words = {} + for word in wordlist: + if not self.disallows.has_key(word): + words[word] = 1 + words = words.keys() + + # for each word, add an entry in the db + for word in words: + # don't dupe + sql = 'select * from _words where _word=%s and _textid=%s'%(a, a) + self.db.cursor.execute(sql, (word, id)) + if self.db.cursor.fetchall(): + continue + sql = 'insert into _words (_word, _textid) values (%s, %s)'%(a, a) + self.db.cursor.execute(sql, (word, id)) + + def find(self, wordlist): + '''look up all the words in the wordlist. + If none are found return an empty dictionary + * more rules here + ''' + l = [word.upper() for word in wordlist if 26 > len(word) > 2] + + a = ','.join([self.db.arg] * len(l)) + sql = 'select distinct(_textid) from _words where _word in (%s)'%a + self.db.cursor.execute(sql, tuple(l)) + r = self.db.cursor.fetchall() + if not r: + return {} + a = ','.join([self.db.arg] * len(r)) + sql = 'select _class, _itemid, _prop from _textids '\ + 'where _textid in (%s)'%a + self.db.cursor.execute(sql, tuple([int(id) for (id,) in r])) + # self.search_index has the results as {some id: identifier} ... + # sigh + r = {} + k = 0 + for c,n,p in self.db.cursor.fetchall(): + key = (str(c), str(n), str(p)) + r[k] = key + k += 1 + return r + + def save_index(self): + # the normal RDBMS backend transaction mechanisms will handle this + pass + + def rollback(self): + # the normal RDBMS backend transaction mechanisms will handle this + pass +
--- a/roundup/backends/rdbms_common.py Fri Mar 19 05:27:55 2004 +0000 +++ b/roundup/backends/rdbms_common.py Sun Mar 21 23:39:08 2004 +0000 @@ -1,4 +1,4 @@ -# $Id: rdbms_common.py,v 1.82 2004-03-19 04:47:59 richard Exp $ +# $Id: rdbms_common.py,v 1.83 2004-03-21 23:39:08 richard Exp $ ''' Relational database (SQL) backend common code. Basics: @@ -39,7 +39,7 @@ # support from blobfiles import FileStorage -from indexer_dbm import Indexer +from indexer_rdbms import Indexer from sessions_rdbms import Sessions, OneTimeKeys from roundup.date import Range @@ -59,7 +59,7 @@ self.config, self.journaltag = config, journaltag self.dir = config.DATABASE self.classes = {} - self.indexer = Indexer(self.dir) + self.indexer = Indexer(self) self.security = security.Security(self) # additional transaction support for external files and the like @@ -177,7 +177,7 @@ self.reindex() # commit - self.conn.commit() + self.sql_commit() # update this number when we need to make changes to the SQL structure # of the backen database @@ -591,7 +591,7 @@ if __debug__: print >>hyperdb.DEBUG, 'newid', (self, sql, classname) self.cursor.execute(sql, (classname, )) - newid = self.cursor.fetchone()[0] + newid = int(self.cursor.fetchone()[0]) # update the counter sql = 'update ids set num=%s where name=%s'%(self.arg, self.arg) @@ -1066,6 +1066,7 @@ def close(self): ''' Close off the connection. ''' + self.indexer.close() self.sql_close() #
