Mercurial > p > roundup > code

--- a/TODO.txt	Fri Mar 19 05:27:55 2004 +0000
+++ b/TODO.txt	Sun Mar 21 23:39:08 2004 +0000
@@ -1,10 +1,9 @@
 This file has been re-purposed to contain specifically the items that need
 doing before the next release:

-- indexing in RDBMSes
-- add tests for group-by-multilink so I finally implement it for the RDBMSes
-- s/getnode/getitem in backends (and s/Node/Item)
 - have rdbms backends look up the journal for actor if it's not set
-- ensure index creation is triggered by the version 1->2 update
+- migrate rdbms backends to use typed columns
+- migrate to numeric ID values (fixes bug 817217)

-- migrate to numeric ID values (fixes bug 817217)
+- ensure index creation is triggered by the version 1->2 update
+  (and other upgrade tests)
--- a/roundup/admin.py	Fri Mar 19 05:27:55 2004 +0000
+++ b/roundup/admin.py	Sun Mar 21 23:39:08 2004 +0000
@@ -16,7 +16,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 #
-# $Id: admin.py,v 1.62 2004-02-11 23:55:08 richard Exp $
+# $Id: admin.py,v 1.63 2004-03-21 23:39:08 richard Exp $

 '''Administration commands for maintaining Roundup trackers.
 '''
@@ -748,7 +748,7 @@
         # convert types
         for propname, value in props.items():
             try:
-                props[key] = hyperdb.rawToHyperdb(self.db, cl, None,
+                props[propname] = hyperdb.rawToHyperdb(self.db, cl, None,
                     propname, value)
             except hyperdb.HyperdbValueError, message:
                 raise UsageError, message
--- a/roundup/backends/back_metakit.py	Fri Mar 19 05:27:55 2004 +0000
+++ b/roundup/backends/back_metakit.py	Sun Mar 21 23:39:08 2004 +0000
@@ -1,4 +1,4 @@
-# $Id: back_metakit.py,v 1.65 2004-03-19 05:27:55 richard Exp $
+# $Id: back_metakit.py,v 1.66 2004-03-21 23:39:08 richard Exp $
 '''Metakit backend for Roundup, originally by Gordon McMillan.

 Known Current Bugs:
@@ -1873,12 +1873,12 @@
             ids[oldpos].ignore = 1
             self.changed = 1
         pos = ids.append(tblid=tblid,nodeid=nodeid,propid=propid)
-
+
         wordlist = re.findall(r'\b\w{2,25}\b', text.upper())
         words = {}
         for word in wordlist:
-	    if not self.disallows.has_key(word):
-            	words[word] = 1
+            if not self.disallows.has_key(word):
+                words[word] = 1
         words = words.keys()

         index = self.db.view('index').ordered(1)
--- a/roundup/backends/back_mysql.py	Fri Mar 19 05:27:55 2004 +0000
+++ b/roundup/backends/back_mysql.py	Sun Mar 21 23:39:08 2004 +0000
@@ -124,14 +124,29 @@
             self.create_version_2_tables()

     def create_version_2_tables(self):
+        # OTK store
         self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), '
-            'otk_value VARCHAR(255), otk_time FLOAT(20))')
+            'otk_value VARCHAR(255), otk_time FLOAT(20)) '
+            'TYPE=%s'%self.mysql_backend)
         self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)')
+
+        # Sessions store
         self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), '
-            'session_time FLOAT(20), session_value VARCHAR(255))')
+            'session_time FLOAT(20), session_value VARCHAR(255)) '
+            'TYPE=%s'%self.mysql_backend)
         self.cursor.execute('CREATE INDEX sessions_key_idx ON '
             'sessions(session_key)')

+        # full-text indexing store
+        self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), '
+            '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT) '
+            'TYPE=%s'%self.mysql_backend)
+        self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), '
+            '_textid INT) TYPE=%s'%self.mysql_backend)
+        self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
+        sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
+        self.cursor.execute(sql, ('_textids', 1))
+
     def add_actor_column(self):
         # update existing tables to have the new actor column
         tables = self.database_schema['tables']
--- a/roundup/backends/back_postgresql.py	Fri Mar 19 05:27:55 2004 +0000
+++ b/roundup/backends/back_postgresql.py	Sun Mar 21 23:39:08 2004 +0000
@@ -113,14 +113,26 @@
             self.create_version_2_tables()

     def create_version_2_tables(self):
+        # OTK store
         self.cursor.execute('CREATE TABLE otks (otk_key VARCHAR(255), '
             'otk_value VARCHAR(255), otk_time FLOAT(20))')
         self.cursor.execute('CREATE INDEX otks_key_idx ON otks(otk_key)')
+
+        # Sessions store
         self.cursor.execute('CREATE TABLE sessions (session_key VARCHAR(255), '
             'session_time FLOAT(20), session_value VARCHAR(255))')
         self.cursor.execute('CREATE INDEX sessions_key_idx ON '
             'sessions(session_key)')

+        # full-text indexing store
+        self.cursor.execute('CREATE TABLE _textids (_class VARCHAR(255), '
+            '_itemid VARCHAR(255), _prop VARCHAR(255), _textid INT4) ')
+        self.cursor.execute('CREATE TABLE _words (_word VARCHAR(30), '
+            '_textid INT4)')
+        self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
+        sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
+        self.cursor.execute(sql, ('_textids', 1))
+
     def add_actor_column(self):
         # update existing tables to have the new actor column
         tables = self.database_schema['tables']
--- a/roundup/backends/back_sqlite.py	Fri Mar 19 05:27:55 2004 +0000
+++ b/roundup/backends/back_sqlite.py	Sun Mar 21 23:39:08 2004 +0000
@@ -1,4 +1,4 @@
-# $Id: back_sqlite.py,v 1.17 2004-03-18 01:58:45 richard Exp $
+# $Id: back_sqlite.py,v 1.18 2004-03-21 23:39:08 richard Exp $
 '''Implements a backend for SQLite.

 See https://pysqlite.sourceforge.net/ for pysqlite info
@@ -65,6 +65,15 @@
         self.cursor.execute('create index sessions_key_idx on '
                 'sessions(session_key)')

+        # full-text indexing store
+        self.cursor.execute('CREATE TABLE _textids (_class varchar, '
+            '_itemid varchar, _prop varchar, _textid integer) ')
+        self.cursor.execute('CREATE TABLE _words (_word varchar, '
+            '_textid integer)')
+        self.cursor.execute('CREATE INDEX words_word_ids ON _words(_word)')
+        sql = 'insert into ids (name, num) values (%s,%s)'%(self.arg, self.arg)
+        self.cursor.execute(sql, ('_textids', 1))
+
     def add_actor_column(self):
         # update existing tables to have the new actor column
         tables = self.database_schema['tables']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/roundup/backends/indexer_rdbms.py	Sun Mar 21 23:39:08 2004 +0000
@@ -0,0 +1,104 @@
+''' This implements the full-text indexer over two RDBMS tables. The first
+is a mapping of words to occurance IDs. The second maps the IDs to (Class,
+propname, itemid) instances.
+'''
+import re
+
+from indexer_dbm import Indexer
+
+class Indexer(Indexer):
+    disallows = {'THE':1, 'THIS':1, 'ZZZ':1, 'THAT':1, 'WITH':1}
+    def __init__(self, db):
+        self.db = db
+        self.reindex = 0
+
+    def close(self):
+        '''close the indexing database'''
+        # just nuke the circular reference
+        self.db = None
+
+    def force_reindex(self):
+        '''Force a reindexing of the database.  This essentially
+        empties the tables ids and index and sets a flag so
+        that the databases are reindexed'''
+        self.reindex = 1
+
+    def should_reindex(self):
+        '''returns True if the indexes need to be rebuilt'''
+        return self.reindex
+
+    def add_text(self, identifier, text, mime_type='text/plain'):
+        ''' "identifier" is  (classname, itemid, property) '''
+        if mime_type != 'text/plain':
+            return
+
+        # first, find the id of the (classname, itemid, property)
+        a = self.db.arg
+        sql = 'select _textid from _textids where _class=%s and '\
+            '_itemid=%s and _prop=%s'%(a, a, a)
+        self.db.cursor.execute(sql, identifier)
+        r = self.db.cursor.fetchone()
+        if not r:
+            id = self.db.newid('_textids')
+            sql = 'insert into _textids (_textid, _class, _itemid, _prop)'\
+                ' values (%s, %s, %s, %s)'%(a, a, a, a)
+            self.db.cursor.execute(sql, (id, ) + identifier)
+        else:
+            id = int(r[0])
+            # clear out any existing indexed values
+            sql = 'delete from _words where _textid=%s'%a
+            self.db.cursor.execute(sql, (id, ))
+
+        # ok, find all the words in the text
+        wordlist = re.findall(r'\b\w{2,25}\b', str(text).upper())
+        words = {}
+        for word in wordlist:
+            if not self.disallows.has_key(word):
+                words[word] = 1
+        words = words.keys()
+
+        # for each word, add an entry in the db
+        for word in words:
+            # don't dupe
+            sql = 'select * from _words where _word=%s and _textid=%s'%(a, a)
+            self.db.cursor.execute(sql, (word, id))
+            if self.db.cursor.fetchall():
+                continue
+            sql = 'insert into _words (_word, _textid) values (%s, %s)'%(a, a)
+            self.db.cursor.execute(sql, (word, id))
+
+    def find(self, wordlist):
+        '''look up all the words in the wordlist.
+        If none are found return an empty dictionary
+        * more rules here
+        '''
+        l = [word.upper() for word in wordlist if 26 > len(word) > 2]
+
+        a = ','.join([self.db.arg] * len(l))
+        sql = 'select distinct(_textid) from _words where _word in (%s)'%a
+        self.db.cursor.execute(sql, tuple(l))
+        r = self.db.cursor.fetchall()
+        if not r:
+            return {}
+        a = ','.join([self.db.arg] * len(r))
+        sql = 'select _class, _itemid, _prop from _textids '\
+            'where _textid in (%s)'%a
+        self.db.cursor.execute(sql, tuple([int(id) for (id,) in r]))
+        # self.search_index has the results as {some id: identifier} ...
+        # sigh
+        r = {}
+        k = 0
+        for c,n,p in self.db.cursor.fetchall():
+            key = (str(c), str(n), str(p))
+            r[k] = key
+            k += 1
+        return r
+
+    def save_index(self):
+        # the normal RDBMS backend transaction mechanisms will handle this
+        pass
+
+    def rollback(self):
+        # the normal RDBMS backend transaction mechanisms will handle this
+        pass
+
--- a/roundup/backends/rdbms_common.py	Fri Mar 19 05:27:55 2004 +0000
+++ b/roundup/backends/rdbms_common.py	Sun Mar 21 23:39:08 2004 +0000
@@ -1,4 +1,4 @@
-# $Id: rdbms_common.py,v 1.82 2004-03-19 04:47:59 richard Exp $
+# $Id: rdbms_common.py,v 1.83 2004-03-21 23:39:08 richard Exp $
 ''' Relational database (SQL) backend common code.

 Basics:
@@ -39,7 +39,7 @@

 # support
 from blobfiles import FileStorage
-from indexer_dbm import Indexer
+from indexer_rdbms import Indexer
 from sessions_rdbms import Sessions, OneTimeKeys
 from roundup.date import Range

@@ -59,7 +59,7 @@
         self.config, self.journaltag = config, journaltag
         self.dir = config.DATABASE
         self.classes = {}
-        self.indexer = Indexer(self.dir)
+        self.indexer = Indexer(self)
         self.security = security.Security(self)

         # additional transaction support for external files and the like
@@ -177,7 +177,7 @@
             self.reindex()

         # commit
-        self.conn.commit()
+        self.sql_commit()

     # update this number when we need to make changes to the SQL structure
     # of the backen database
@@ -591,7 +591,7 @@
         if __debug__:
             print >>hyperdb.DEBUG, 'newid', (self, sql, classname)
         self.cursor.execute(sql, (classname, ))
-        newid = self.cursor.fetchone()[0]
+        newid = int(self.cursor.fetchone()[0])

         # update the counter
         sql = 'update ids set num=%s where name=%s'%(self.arg, self.arg)
@@ -1066,6 +1066,7 @@
     def close(self):
         ''' Close off the connection.
         '''
+        self.indexer.close()
         self.sql_close()

 #