changeset 3016:224c7c0b9708

First checkin of tsearch2 "backend". Miscellaneous notes: * We override the testTransactions method, as it relies on FileStorage for its transaction testing. * importing/exporting doesn't work right yet. * Filtering of text/plain mime-types is an ugly hack right now.
author Johannes Gijsbers <jlgijsbers@users.sourceforge.net>
date Thu, 16 Dec 2004 22:22:55 +0000
parents 6dbe3798a4c4
children f1cba8342186
files roundup/backends/back_tsearch2.py roundup/backends/rdbms_common.py roundup/backends/tsearch2_setup.py test/db_test_base.py test/test_tsearch2.py
diffstat 5 files changed, 1063 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/roundup/backends/back_tsearch2.py	Thu Dec 16 22:22:55 2004 +0000
@@ -0,0 +1,215 @@
+import re
+
+import psycopg
+
+from roundup import hyperdb
+from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms
+from roundup.backends.back_postgresql import db_create, db_nuke, db_command
+from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass
+
+# XXX: Should probably be on the Class class.
+def _indexedProps(spec):
+    """Get a list of properties to be indexed on 'spec'."""
+    return [prop for prop, propclass in spec.getprops().items()
+            if isinstance(propclass, hyperdb.String) and propclass.indexme]
+
+def _getQueryDict(spec):
+    """Get a convenience dictionary for creating tsearch2 indexes."""
+    query_dict = {'classname': spec.classname,
+                  'indexedColumns': ['_' + prop for prop in _indexedProps(spec)]}
+    query_dict['tablename'] = "_%(classname)s" % query_dict
+    query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict
+    return query_dict
+
+def _isLink(propclass):
+    return (isinstance(propclass, hyperdb.Link) or
+            isinstance(propclass, hyperdb.Multilink))
+
+class Database(back_postgresql.Database):
+    def __init__(self, config, journaltag=None):
+        back_postgresql.Database.__init__(self, config, journaltag)
+        self.indexer = Indexer(self)
+    
+    def create_version_2_tables(self):
+        back_postgresql.Database.create_version_2_tables(self)
+        tsearch2_setup.setup(self.cursor)    
+
+    def create_class_table_indexes(self, spec):
+        back_postgresql.Database.create_class_table_indexes(self, spec)
+        self.cursor.execute("""CREATE INDEX _%(classname)s_idxFTI_idx
+                               ON %(tablename)s USING gist(idxFTI);""" %
+                            _getQueryDict(spec))
+
+        self.create_tsearch2_trigger(spec)
+
+    def create_tsearch2_trigger(self, spec):
+        d = _getQueryDict(spec)
+        if d['indexedColumns']:
+            
+            d['joined'] = " || ' ' ||".join(d['indexedColumns'])
+            query = """UPDATE %(tablename)s
+                       SET idxFTI = to_tsvector('default', %(joined)s)""" % d
+            self.cursor.execute(query)
+
+            d['joined'] = ", ".join(d['indexedColumns']) 
+            query = """CREATE TRIGGER %(triggername)s
+                       BEFORE UPDATE OR INSERT ON %(tablename)s
+                       FOR EACH ROW EXECUTE PROCEDURE
+                       tsearch2(idxFTI, %(joined)s);""" % d
+            self.cursor.execute(query)
+
+    def drop_tsearch2_trigger(self, spec):
+        # Check whether the trigger exists before trying to drop it.
+        query_dict = _getQueryDict(spec)
+        self.sql("""SELECT tgname FROM pg_catalog.pg_trigger
+                    WHERE tgname = '%(triggername)s'""" % query_dict)
+        if self.cursor.fetchall():
+            self.sql("""DROP TRIGGER %(triggername)s ON %(tablename)s""" %
+                     query_dict)
+
+    def update_class(self, spec, old_spec, force=0):
+        result = back_postgresql.Database.update_class(self, spec, old_spec, force)
+
+        # Drop trigger...
+        self.drop_tsearch2_trigger(spec)
+
+        # and recreate if necessary.
+        self.create_tsearch2_trigger(spec)
+
+        return result
+
+    def determine_all_columns(self, spec):
+        cols, mls = back_postgresql.Database.determine_all_columns(self, spec)
+        cols.append(('idxFTI', 'tsvector'))
+        return cols, mls
+        
+class Indexer:
+    def __init__(self, db):
+        self.db = db
+
+    def force_reindex(self):
+        pass
+        
+    def should_reindex(self):
+        pass
+
+    def save_index(self):
+        pass
+
+    def add_text(self, identifier, text, mime_type=None):
+        pass
+
+    def close(self):
+        pass
+    
+    def search(self, search_terms, klass, ignore={},
+               dre=re.compile(r'([^\d]+)(\d+)')):
+        '''Display search results looking for [search, terms] associated
+        with the hyperdb Class "klass". Ignore hits on {class: property}.
+
+        "dre" is a helper, not an argument.
+        '''
+        # do the index lookup
+        hits = self.find(search_terms, klass)
+        if not hits:
+            return {}
+
+        designator_propname = {}
+        for nm, propclass in klass.getprops().items():
+            if (isinstance(propclass, hyperdb.Link)
+                or isinstance(propclass, hyperdb.Multilink)):
+                designator_propname[propclass.classname] = nm
+
+        # build a dictionary of nodes and their associated messages
+        # and files
+        nodeids = {}      # this is the answer
+        propspec = {}     # used to do the klass.find
+        for propname in designator_propname.values():
+            propspec[propname] = {}   # used as a set (value doesn't matter)
+
+        for classname, nodeid in hits:
+            # if it's a property on klass, it's easy
+            if classname == klass.classname:
+                if not nodeids.has_key(nodeid):
+                    nodeids[nodeid] = {}
+                continue
+
+            # make sure the class is a linked one, otherwise ignore
+            if not designator_propname.has_key(classname):
+                continue
+
+            # it's a linked class - set up to do the klass.find
+            linkprop = designator_propname[classname]   # eg, msg -> messages
+            propspec[linkprop][nodeid] = 1
+
+        # retain only the meaningful entries
+        for propname, idset in propspec.items():
+            if not idset:
+                del propspec[propname]
+        
+        # klass.find tells me the klass nodeids the linked nodes relate to
+        for resid in klass.find(**propspec):
+            resid = str(resid)
+            if not nodeids.has_key(id):
+                nodeids[resid] = {}
+            node_dict = nodeids[resid]
+            # now figure out where it came from
+            for linkprop in propspec.keys():
+                for nodeid in klass.get(resid, linkprop):
+                    if propspec[linkprop].has_key(nodeid):
+                        # OK, this node[propname] has a winner
+                        if not node_dict.has_key(linkprop):
+                            node_dict[linkprop] = [nodeid]
+                        else:
+                            node_dict[linkprop].append(nodeid)
+        return nodeids
+    
+    def find(self, search_terms, klass):
+        if not search_terms:
+            return None
+
+        nodeids = self.tsearchQuery(klass.classname, search_terms)
+        designator_propname = {}
+
+        for nm, propclass in klass.getprops().items():
+            if _isLink(propclass):
+                nodeids.extend(self.tsearchQuery(propclass.classname, search_terms))
+
+        return nodeids
+
+    def tsearchQuery(self, classname, search_terms):
+        query = """SELECT id FROM _%(classname)s
+                   WHERE idxFTI @@ to_tsquery('default', '%(terms)s')"""                    
+        
+        query = query % {'classname': classname,
+                         'terms': ' & '.join(search_terms)}
+        self.db.cursor.execute(query)
+        klass = self.db.getclass(classname)
+        nodeids = [str(row[0]) for row in self.db.cursor.fetchall()]
+
+        # filter out files without text/plain mime type
+        # XXX: files without text/plain shouldn't be indexed at all, we
+        # should take care of this in the trigger
+        if 'type' in klass.getprops():
+            nodeids = [nodeid for nodeid in nodeids
+                       if klass.get(nodeid, 'type') == 'text/plain']
+            
+        return [(classname, nodeid) for nodeid in nodeids]
+
+# XXX: we can't reuse hyperdb.FileClass for importing/exporting, so file
+# contents will end up in CSV exports for now. Not sure whether this is
+# truly a problem. If it is, we should write an importer/exporter that
+# converts from the database to the filesystem and vice versa
+class FileClass(Class):
+    def __init__(self, db, classname, **properties):
+        '''The newly-created class automatically includes the "content" property.,
+        '''
+        properties['content'] = hyperdb.String(indexme='yes')
+        Class.__init__(self, db, classname, **properties)
+
+    default_mime_type = 'text/plain'
+    def create(self, **propvalues):
+        # figure the mime type
+        if not propvalues.get('type'):
+            propvalues['type'] = self.default_mime_type
+        return Class.create(self, **propvalues)
--- a/roundup/backends/rdbms_common.py	Wed Dec 15 00:00:52 2004 +0000
+++ b/roundup/backends/rdbms_common.py	Thu Dec 16 22:22:55 2004 +0000
@@ -1,4 +1,4 @@
-# $Id: rdbms_common.py,v 1.142 2004-12-03 22:11:06 richard Exp $
+# $Id: rdbms_common.py,v 1.143 2004-12-16 22:22:55 jlgijsbers Exp $
 ''' Relational database (SQL) backend common code.
 
 Basics:
@@ -454,14 +454,21 @@
 
         return 1
 
+    def determine_all_columns(self, spec):
+        """Figure out the columns from the spec and also add internal columns
+
+        """
+        cols, mls = self.determine_columns(spec.properties.items())
+        
+        # add on our special columns
+        cols.append(('id', 'INTEGER PRIMARY KEY'))
+        cols.append(('__retired__', 'INTEGER DEFAULT 0'))
+        return cols, mls
+
     def create_class_table(self, spec):
         '''Create the class table for the given Class "spec". Creates the
         indexes too.'''
-        cols, mls = self.determine_columns(spec.properties.items())
-
-        # add on our special columns
-        cols.append(('id', 'INTEGER PRIMARY KEY'))
-        cols.append(('__retired__', 'INTEGER DEFAULT 0'))
+        cols, mls = self.determine_all_columns(spec)
 
         # create the base table
         scols = ','.join(['%s %s'%x for x in cols])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/roundup/backends/tsearch2_setup.py	Thu Dec 16 22:22:55 2004 +0000
@@ -0,0 +1,708 @@
+tsearch_sql = """
+-- Adjust this setting to control where the objects get CREATEd.
+SET search_path = public;
+
+--dict conf
+CREATE TABLE pg_ts_dict (
+	dict_name	text not null primary key,
+	dict_init	oid,
+	dict_initoption	text,
+	dict_lexize	oid not null,
+	dict_comment	text
+) with oids;
+
+--dict interface
+CREATE FUNCTION lexize(oid, text) 
+	returns _text
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION lexize(text, text)
+        returns _text
+        as '$libdir/tsearch2', 'lexize_byname'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION lexize(text)
+        returns _text
+        as '$libdir/tsearch2', 'lexize_bycurrent'
+        language 'C'
+        with (isstrict);
+
+CREATE FUNCTION set_curdict(int)
+	returns void
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION set_curdict(text)
+	returns void
+	as '$libdir/tsearch2', 'set_curdict_byname'
+	language 'C'
+	with (isstrict);
+
+--built-in dictionaries
+CREATE FUNCTION dex_init(text)
+	returns internal
+	as '$libdir/tsearch2' 
+	language 'C';
+
+CREATE FUNCTION dex_lexize(internal,internal,int4)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+insert into pg_ts_dict select 
+	'simple', 
+	(select oid from pg_proc where proname='dex_init'),
+	null,
+	(select oid from pg_proc where proname='dex_lexize'),
+	'Simple example of dictionary.'
+;
+	 
+CREATE FUNCTION snb_en_init(text)
+	returns internal
+	as '$libdir/tsearch2' 
+	language 'C';
+
+CREATE FUNCTION snb_lexize(internal,internal,int4)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+insert into pg_ts_dict select 
+	'en_stem', 
+	(select oid from pg_proc where proname='snb_en_init'),
+	'/usr/share/postgresql/contrib/english.stop',
+	(select oid from pg_proc where proname='snb_lexize'),
+	'English Stemmer. Snowball.'
+;
+
+CREATE FUNCTION snb_ru_init(text)
+	returns internal
+	as '$libdir/tsearch2' 
+	language 'C';
+
+insert into pg_ts_dict select 
+	'ru_stem', 
+	(select oid from pg_proc where proname='snb_ru_init'),
+	'/usr/share/postgresql/contrib/russian.stop',
+	(select oid from pg_proc where proname='snb_lexize'),
+	'Russian Stemmer. Snowball.'
+;
+	 
+CREATE FUNCTION spell_init(text)
+	returns internal
+	as '$libdir/tsearch2' 
+	language 'C';
+
+CREATE FUNCTION spell_lexize(internal,internal,int4)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+insert into pg_ts_dict select 
+	'ispell_template', 
+	(select oid from pg_proc where proname='spell_init'),
+	null,
+	(select oid from pg_proc where proname='spell_lexize'),
+	'ISpell interface. Must have .dict and .aff files'
+;
+
+CREATE FUNCTION syn_init(text)
+	returns internal
+	as '$libdir/tsearch2' 
+	language 'C';
+
+CREATE FUNCTION syn_lexize(internal,internal,int4)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+insert into pg_ts_dict select 
+	'synonym', 
+	(select oid from pg_proc where proname='syn_init'),
+	null,
+	(select oid from pg_proc where proname='syn_lexize'),
+	'Example of synonym dictionary'
+;
+
+--dict conf
+CREATE TABLE pg_ts_parser (
+	prs_name	text not null primary key,
+	prs_start	oid not null,
+	prs_nexttoken	oid not null,
+	prs_end		oid not null,
+	prs_headline	oid not null,
+	prs_lextype	oid not null,
+	prs_comment	text
+) with oids;
+
+--sql-level interface
+CREATE TYPE tokentype 
+	as (tokid int4, alias text, descr text); 
+
+CREATE FUNCTION token_type(int4)
+	returns setof tokentype
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION token_type(text)
+	returns setof tokentype
+	as '$libdir/tsearch2', 'token_type_byname'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION token_type()
+	returns setof tokentype
+	as '$libdir/tsearch2', 'token_type_current'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION set_curprs(int)
+	returns void
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION set_curprs(text)
+	returns void
+	as '$libdir/tsearch2', 'set_curprs_byname'
+	language 'C'
+	with (isstrict);
+
+CREATE TYPE tokenout 
+	as (tokid int4, token text);
+
+CREATE FUNCTION parse(oid,text)
+	returns setof tokenout
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+ 
+CREATE FUNCTION parse(text,text)
+	returns setof tokenout
+	as '$libdir/tsearch2', 'parse_byname'
+	language 'C'
+	with (isstrict);
+ 
+CREATE FUNCTION parse(text)
+	returns setof tokenout
+	as '$libdir/tsearch2', 'parse_current'
+	language 'C'
+	with (isstrict);
+ 
+--default parser
+CREATE FUNCTION prsd_start(internal,int4)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C';
+
+CREATE FUNCTION prsd_getlexeme(internal,internal,internal)
+	returns int4
+	as '$libdir/tsearch2'
+	language 'C';
+
+CREATE FUNCTION prsd_end(internal)
+	returns void
+	as '$libdir/tsearch2'
+	language 'C';
+
+CREATE FUNCTION prsd_lextype(internal)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C';
+
+CREATE FUNCTION prsd_headline(internal,internal,internal)
+	returns internal
+	as '$libdir/tsearch2'
+	language 'C';
+
+insert into pg_ts_parser select
+	'default',
+	(select oid from pg_proc where proname='prsd_start'),	
+	(select oid from pg_proc where proname='prsd_getlexeme'),	
+	(select oid from pg_proc where proname='prsd_end'),	
+	(select oid from pg_proc where proname='prsd_headline'),
+	(select oid from pg_proc where proname='prsd_lextype'),
+	'Parser from OpenFTS v0.34'
+;	
+
+--tsearch config
+
+CREATE TABLE pg_ts_cfg (
+	ts_name		text not null primary key,
+	prs_name	text not null,
+	locale		text
+) with oids;
+
+CREATE TABLE pg_ts_cfgmap (
+	ts_name		text not null,
+	tok_alias	text not null,
+	dict_name	text[],
+	primary key (ts_name,tok_alias)
+) with oids;
+
+CREATE FUNCTION set_curcfg(int)
+	returns void
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION set_curcfg(text)
+	returns void
+	as '$libdir/tsearch2', 'set_curcfg_byname'
+	language 'C'
+	with (isstrict);
+
+CREATE FUNCTION show_curcfg()
+	returns oid
+	as '$libdir/tsearch2'
+	language 'C'
+	with (isstrict);
+
+insert into pg_ts_cfg values ('default', 'default','C');
+insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R');
+insert into pg_ts_cfg values ('simple', 'default');
+
+insert into pg_ts_cfgmap values ('default', 'lword', '{en_stem}');
+insert into pg_ts_cfgmap values ('default', 'nlword', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'word', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'email', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'url', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'host', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'sfloat', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'version', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'part_hword', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'nlpart_hword', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'lpart_hword', '{en_stem}');
+insert into pg_ts_cfgmap values ('default', 'hword', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'lhword', '{en_stem}');
+insert into pg_ts_cfgmap values ('default', 'nlhword', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'uri', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'file', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'float', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'int', '{simple}');
+insert into pg_ts_cfgmap values ('default', 'uint', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'lword', '{en_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'email', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'url', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'host', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'sfloat', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'version', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'part_hword', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'lpart_hword', '{en_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'lhword', '{en_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem}');
+insert into pg_ts_cfgmap values ('default_russian', 'uri', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'file', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'float', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'int', '{simple}');
+insert into pg_ts_cfgmap values ('default_russian', 'uint', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'lword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'nlword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'word', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'email', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'url', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'host', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'sfloat', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'version', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'part_hword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'nlpart_hword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'lpart_hword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'hword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'lhword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'nlhword', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'uri', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'file', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'float', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'int', '{simple}');
+insert into pg_ts_cfgmap values ('simple', 'uint', '{simple}');
+
+--tsvector type
+CREATE FUNCTION tsvector_in(cstring)
+RETURNS tsvector
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsvector_out(tsvector)
+RETURNS cstring
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsvector (
+        INTERNALLENGTH = -1,
+        INPUT = tsvector_in,
+        OUTPUT = tsvector_out,
+        STORAGE = extended
+);
+
+CREATE FUNCTION length(tsvector)
+RETURNS int4
+AS '$libdir/tsearch2', 'tsvector_length'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(oid, text)
+RETURNS tsvector
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text, text)
+RETURNS tsvector
+AS '$libdir/tsearch2', 'to_tsvector_name'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsvector(text)
+RETURNS tsvector
+AS '$libdir/tsearch2', 'to_tsvector_current'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION strip(tsvector)
+RETURNS tsvector
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION setweight(tsvector,"char")
+RETURNS tsvector
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE FUNCTION concat(tsvector,tsvector)
+RETURNS tsvector
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict,iscachable);
+
+CREATE OPERATOR || (
+        LEFTARG = tsvector,
+        RIGHTARG = tsvector,
+        PROCEDURE = concat
+);
+
+--query type
+CREATE FUNCTION tsquery_in(cstring)
+RETURNS tsquery
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION tsquery_out(tsquery)
+RETURNS cstring
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE tsquery (
+        INTERNALLENGTH = -1,
+        INPUT = tsquery_in,
+        OUTPUT = tsquery_out
+);
+
+CREATE FUNCTION querytree(tsquery)
+RETURNS text
+AS '$libdir/tsearch2', 'tsquerytree'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION to_tsquery(oid, text)
+RETURNS tsquery
+AS '$libdir/tsearch2'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text, text)
+RETURNS tsquery
+AS '$libdir/tsearch2','to_tsquery_name'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+CREATE FUNCTION to_tsquery(text)
+RETURNS tsquery
+AS '$libdir/tsearch2','to_tsquery_current'
+LANGUAGE 'c' with (isstrict,iscachable);
+
+--operations
+CREATE FUNCTION exectsq(tsvector, tsquery)
+RETURNS bool
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict, iscachable);
+  
+COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index';
+
+CREATE FUNCTION rexectsq(tsquery, tsvector)
+RETURNS bool
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict, iscachable);
+
+COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index';
+
+CREATE OPERATOR @@ (
+        LEFTARG = tsvector,
+        RIGHTARG = tsquery,
+        PROCEDURE = exectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+CREATE OPERATOR @@ (
+        LEFTARG = tsquery,
+        RIGHTARG = tsvector,
+        PROCEDURE = rexectsq,
+        COMMUTATOR = '@@',
+        RESTRICT = contsel,
+        JOIN = contjoinsel
+);
+
+--Trigger
+CREATE FUNCTION tsearch2()
+RETURNS trigger
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+
+--Relevation
+CREATE FUNCTION rank(float4[], tsvector, tsquery)
+RETURNS float4
+AS '$libdir/tsearch2'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(float4[], tsvector, tsquery, int4)
+RETURNS float4
+AS '$libdir/tsearch2'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery)
+RETURNS float4
+AS '$libdir/tsearch2', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank(tsvector, tsquery, int4)
+RETURNS float4
+AS '$libdir/tsearch2', 'rank_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery)
+RETURNS float4
+AS '$libdir/tsearch2'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4)
+RETURNS float4
+AS '$libdir/tsearch2'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery)
+RETURNS float4
+AS '$libdir/tsearch2', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION rank_cd(tsvector, tsquery, int4)
+RETURNS float4
+AS '$libdir/tsearch2', 'rank_cd_def'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery, text)
+RETURNS text
+AS '$libdir/tsearch2', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(oid, text, tsquery)
+RETURNS text
+AS '$libdir/tsearch2', 'headline'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery, text)
+RETURNS text
+AS '$libdir/tsearch2', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, text, tsquery)
+RETURNS text
+AS '$libdir/tsearch2', 'headline_byname'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery, text)
+RETURNS text
+AS '$libdir/tsearch2', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+CREATE FUNCTION headline(text, tsquery)
+RETURNS text
+AS '$libdir/tsearch2', 'headline_current'
+LANGUAGE 'C' WITH (isstrict, iscachable);
+
+--GiST
+--GiST key type 
+CREATE FUNCTION gtsvector_in(cstring)
+RETURNS gtsvector
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_out(gtsvector)
+RETURNS cstring
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE TYPE gtsvector (
+        INTERNALLENGTH = -1,
+        INPUT = gtsvector_in,
+        OUTPUT = gtsvector_out
+);
+
+-- support FUNCTIONs
+CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4)
+RETURNS bool
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+  
+CREATE FUNCTION gtsvector_compress(internal)
+RETURNS internal
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_decompress(internal)
+RETURNS internal
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_penalty(internal,internal,internal)
+RETURNS internal
+AS '$libdir/tsearch2'
+LANGUAGE 'C' with (isstrict);
+
+CREATE FUNCTION gtsvector_picksplit(internal, internal)
+RETURNS internal
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_union(bytea, internal)
+RETURNS _int4
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+
+CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal)
+RETURNS internal
+AS '$libdir/tsearch2'
+LANGUAGE 'C';
+
+-- CREATE the OPERATOR class
+CREATE OPERATOR CLASS gist_tsvector_ops
+DEFAULT FOR TYPE tsvector USING gist
+AS
+        OPERATOR        1       @@ (tsvector, tsquery)  RECHECK ,
+        FUNCTION        1       gtsvector_consistent (gtsvector, internal, int4),
+        FUNCTION        2       gtsvector_union (bytea, internal),
+        FUNCTION        3       gtsvector_compress (internal),
+        FUNCTION        4       gtsvector_decompress (internal),
+        FUNCTION        5       gtsvector_penalty (internal, internal, internal),
+        FUNCTION        6       gtsvector_picksplit (internal, internal),
+        FUNCTION        7       gtsvector_same (gtsvector, gtsvector, internal),
+        STORAGE         gtsvector;
+
+
+--stat info
+CREATE TYPE statinfo 
+	as (word text, ndoc int4, nentry int4);
+
+--CREATE FUNCTION tsstat_in(cstring)
+--RETURNS tsstat
+--AS '$libdir/tsearch2'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION tsstat_out(tsstat)
+--RETURNS cstring
+--AS '$libdir/tsearch2'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE TYPE tsstat (
+--        INTERNALLENGTH = -1,
+--        INPUT = tsstat_in,
+--        OUTPUT = tsstat_out,
+--        STORAGE = plain
+--);
+--
+--CREATE FUNCTION ts_accum(tsstat,tsvector)
+--RETURNS tsstat
+--AS '$libdir/tsearch2'
+--LANGUAGE 'C' with (isstrict);
+--
+--CREATE FUNCTION ts_accum_finish(tsstat)
+--	returns setof statinfo
+--	as '$libdir/tsearch2'
+--	language 'C'
+--	with (isstrict);
+--
+--CREATE AGGREGATE stat (
+--	BASETYPE=tsvector,
+--	SFUNC=ts_accum,
+--	STYPE=tsstat,
+--	FINALFUNC = ts_accum_finish,
+--	initcond = ''
+--); 
+
+CREATE FUNCTION stat(text)
+	returns setof statinfo
+	as '$libdir/tsearch2', 'ts_stat'
+	language 'C'
+	with (isstrict);
+
+--reset - just for debuging
+CREATE FUNCTION reset_tsearch()
+        returns void
+        as '$libdir/tsearch2'
+        language 'C'
+        with (isstrict);
+
+--get cover (debug for rank_cd)
+CREATE FUNCTION get_covers(tsvector,tsquery)
+        returns text
+        as '$libdir/tsearch2'
+        language 'C'
+        with (isstrict);
+
+--debug function
+create type tsdebug as (
+        ts_name text,
+        tok_type text,
+        description text,
+        token   text,
+        dict_name text[],
+        "tsvector" tsvector
+);
+
+create function _get_parser_from_curcfg() 
+returns text as 
+' select prs_name from pg_ts_cfg where oid = show_curcfg() '
+language 'SQL' with(isstrict,iscachable);
+
+create function ts_debug(text)
+returns setof tsdebug as '
+select 
+        m.ts_name,
+        t.alias as tok_type,
+        t.descr as description,
+        p.token,
+        m.dict_name,
+        strip(to_tsvector(p.token)) as tsvector
+from
+        parse( _get_parser_from_curcfg(), $1 ) as p,
+        token_type() as t,
+        pg_ts_cfgmap as m,
+        pg_ts_cfg as c
+where
+        t.tokid=p.tokid and
+        t.alias = m.tok_alias and 
+        m.ts_name=c.ts_name and 
+        c.oid=show_curcfg() 
+' language 'SQL' with(isstrict);
+"""
+
+def setup(cursor):
+    sql = '\n'.join([line for line in tsearch_sql.split('\n')
+                     if not line.startswith('--')])
+    for query in sql.split(';'):
+        if query.strip():
+            cursor.execute(query)
--- a/test/db_test_base.py	Wed Dec 15 00:00:52 2004 +0000
+++ b/test/db_test_base.py	Thu Dec 16 22:22:55 2004 +0000
@@ -15,7 +15,7 @@
 # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
 # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 #
-# $Id: db_test_base.py,v 1.55 2004-11-26 00:01:04 richard Exp $
+# $Id: db_test_base.py,v 1.56 2004-12-16 22:22:55 jlgijsbers Exp $
 
 import unittest, os, shutil, errno, imp, sys, time, pprint
 
@@ -78,6 +78,8 @@
         priority=Link('priority'))
     stuff = module.Class(db, "stuff", stuff=String())
     session = module.Class(db, 'session', title=String())
+    msg = module.FileClass(db, "msg",
+                           author=Link("user", do_journal='no'))
     session.disableJournalling()
     db.post_init()
     if create:
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/test_tsearch2.py	Thu Dec 16 22:22:55 2004 +0000
@@ -0,0 +1,124 @@
+#
+# Copyright (c) 2001 Bizar Software Pty Ltd (http://www.bizarsoftware.com.au/)
+# This module is free software, and you may redistribute it and/or modify
+# under the same terms as Python, so long as this copyright message and
+# disclaimer are retained in their original form.
+#
+# IN NO EVENT SHALL BIZAR SOFTWARE PTY LTD BE LIABLE TO ANY PARTY FOR
+# DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING
+# OUT OF THE USE OF THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+# BIZAR SOFTWARE PTY LTD SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE.  THE CODE PROVIDED HEREUNDER IS ON AN "AS IS"
+# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE,
+# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+#
+# $Id: test_tsearch2.py,v 1.1 2004-12-16 22:22:55 jlgijsbers Exp $
+
+import unittest
+
+from roundup.hyperdb import DatabaseError
+
+from db_test_base import DBTest, ROTest, config, SchemaTest, ClassicInitTest
+
+from roundup.backends import get_backend, have_backend
+
+class tsearch2Opener:
+    if have_backend('tsearch2'):
+        module = get_backend('tsearch2')
+
+    def setUp(self):
+        pass
+
+    def tearDown(self):
+        self.nuke_database()
+
+    def nuke_database(self):
+        # clear out the database - easiest way is to nuke and re-create it
+        self.module.db_nuke(config)
+
+class tsearch2DBTest(tsearch2Opener, DBTest):
+    def setUp(self):
+        tsearch2Opener.setUp(self)
+        DBTest.setUp(self)
+
+    def tearDown(self):
+        DBTest.tearDown(self)
+        tsearch2Opener.tearDown(self)
+
+    def testFilteringIntervalSort(self):
+        # Tsearch2 sorts NULLs differently to other databases (others
+        # treat it as lower than real values, PG treats it as higher)
+        ae, filt = self.filteringSetup()
+        # ascending should sort None, 1:10, 1d
+        ae(filt(None, {}, ('+','foo'), (None,None)), ['4', '1', '2', '3'])
+        # descending should sort 1d, 1:10, None
+        ae(filt(None, {}, ('-','foo'), (None,None)), ['3', '2', '1', '4'])
+
+    def testTransactions(self):
+        # XXX: in its current form, this test doesn't make sense for tsearch2.
+        # It tests the transactions mechanism by counting the number of files
+        # in the FileStorage. As tsearch2 doesn't use the FileStorage, this
+        # fails. The test should probably be rewritten with some other way of
+        # checking rollbacks/commits.
+        pass
+
+class tsearch2ROTest(tsearch2Opener, ROTest):
+    def setUp(self):
+        tsearch2Opener.setUp(self)
+        ROTest.setUp(self)
+
+    def tearDown(self):
+        ROTest.tearDown(self)
+        tsearch2Opener.tearDown(self)
+
+class tsearch2SchemaTest(tsearch2Opener, SchemaTest):
+    def setUp(self):
+        tsearch2Opener.setUp(self)
+        SchemaTest.setUp(self)
+
+    def tearDown(self):
+        SchemaTest.tearDown(self)
+        tsearch2Opener.tearDown(self)
+
+class tsearch2ClassicInitTest(tsearch2Opener, ClassicInitTest):
+    backend = 'tsearch2'
+    def setUp(self):
+        tsearch2Opener.setUp(self)
+        ClassicInitTest.setUp(self)
+
+    def tearDown(self):
+        ClassicInitTest.tearDown(self)
+        tsearch2Opener.tearDown(self)
+
+from session_common import RDBMSTest
+class tsearch2SessionTest(tsearch2Opener, RDBMSTest):
+    def setUp(self):
+        tsearch2Opener.setUp(self)
+        RDBMSTest.setUp(self)
+    def tearDown(self):
+        RDBMSTest.tearDown(self)
+        tsearch2Opener.tearDown(self)
+
+def test_suite():
+    suite = unittest.TestSuite()
+    if not have_backend('tsearch2'):
+        print "Skipping tsearch2 tests"
+        return suite
+
+    # make sure we start with a clean slate
+    if tsearch2Opener.module.db_exists(config):
+        tsearch2Opener.module.db_nuke(config, 1)
+
+    # TODO: Check if we can run postgresql tests
+    print 'Including tsearch2 tests'
+    suite.addTest(unittest.makeSuite(tsearch2DBTest))
+    suite.addTest(unittest.makeSuite(tsearch2ROTest))
+    suite.addTest(unittest.makeSuite(tsearch2SchemaTest))
+    suite.addTest(unittest.makeSuite(tsearch2ClassicInitTest))
+    suite.addTest(unittest.makeSuite(tsearch2SessionTest))
+    return suite
+
+# vim: set et sts=4 sw=4 :

Roundup Issue Tracker: http://roundup-tracker.org/