Mercurial > p > roundup > code
changeset 3016:224c7c0b9708
First checkin of tsearch2 "backend". Miscellaneous notes:
* We override the testTransactions method, as it relies on FileStorage for its
transaction testing.
* importing/exporting doesn't work right yet.
* Filtering of text/plain mime-types is an ugly hack right now.
| author | Johannes Gijsbers <jlgijsbers@users.sourceforge.net> |
|---|---|
| date | Thu, 16 Dec 2004 22:22:55 +0000 |
| parents | 6dbe3798a4c4 |
| children | f1cba8342186 |
| files | roundup/backends/back_tsearch2.py roundup/backends/rdbms_common.py roundup/backends/tsearch2_setup.py test/db_test_base.py test/test_tsearch2.py |
| diffstat | 5 files changed, 1063 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roundup/backends/back_tsearch2.py Thu Dec 16 22:22:55 2004 +0000 @@ -0,0 +1,215 @@ +import re + +import psycopg + +from roundup import hyperdb +from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms +from roundup.backends.back_postgresql import db_create, db_nuke, db_command +from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass + +# XXX: Should probably be on the Class class. +def _indexedProps(spec): + """Get a list of properties to be indexed on 'spec'.""" + return [prop for prop, propclass in spec.getprops().items() + if isinstance(propclass, hyperdb.String) and propclass.indexme] + +def _getQueryDict(spec): + """Get a convenience dictionary for creating tsearch2 indexes.""" + query_dict = {'classname': spec.classname, + 'indexedColumns': ['_' + prop for prop in _indexedProps(spec)]} + query_dict['tablename'] = "_%(classname)s" % query_dict + query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict + return query_dict + +def _isLink(propclass): + return (isinstance(propclass, hyperdb.Link) or + isinstance(propclass, hyperdb.Multilink)) + +class Database(back_postgresql.Database): + def __init__(self, config, journaltag=None): + back_postgresql.Database.__init__(self, config, journaltag) + self.indexer = Indexer(self) + + def create_version_2_tables(self): + back_postgresql.Database.create_version_2_tables(self) + tsearch2_setup.setup(self.cursor) + + def create_class_table_indexes(self, spec): + back_postgresql.Database.create_class_table_indexes(self, spec) + self.cursor.execute("""CREATE INDEX _%(classname)s_idxFTI_idx + ON %(tablename)s USING gist(idxFTI);""" % + _getQueryDict(spec)) + + self.create_tsearch2_trigger(spec) + + def create_tsearch2_trigger(self, spec): + d = _getQueryDict(spec) + if d['indexedColumns']: + + d['joined'] = " || ' ' ||".join(d['indexedColumns']) + query = """UPDATE %(tablename)s + SET idxFTI = to_tsvector('default', %(joined)s)""" % d + self.cursor.execute(query) + + d['joined'] = ", ".join(d['indexedColumns']) + query = """CREATE TRIGGER %(triggername)s + BEFORE UPDATE OR INSERT ON %(tablename)s + FOR EACH ROW EXECUTE PROCEDURE + tsearch2(idxFTI, %(joined)s);""" % d + self.cursor.execute(query) + + def drop_tsearch2_trigger(self, spec): + # Check whether the trigger exists before trying to drop it. + query_dict = _getQueryDict(spec) + self.sql("""SELECT tgname FROM pg_catalog.pg_trigger + WHERE tgname = '%(triggername)s'""" % query_dict) + if self.cursor.fetchall(): + self.sql("""DROP TRIGGER %(triggername)s ON %(tablename)s""" % + query_dict) + + def update_class(self, spec, old_spec, force=0): + result = back_postgresql.Database.update_class(self, spec, old_spec, force) + + # Drop trigger... + self.drop_tsearch2_trigger(spec) + + # and recreate if necessary. + self.create_tsearch2_trigger(spec) + + return result + + def determine_all_columns(self, spec): + cols, mls = back_postgresql.Database.determine_all_columns(self, spec) + cols.append(('idxFTI', 'tsvector')) + return cols, mls + +class Indexer: + def __init__(self, db): + self.db = db + + def force_reindex(self): + pass + + def should_reindex(self): + pass + + def save_index(self): + pass + + def add_text(self, identifier, text, mime_type=None): + pass + + def close(self): + pass + + def search(self, search_terms, klass, ignore={}, + dre=re.compile(r'([^\d]+)(\d+)')): + '''Display search results looking for [search, terms] associated + with the hyperdb Class "klass". Ignore hits on {class: property}. + + "dre" is a helper, not an argument. + ''' + # do the index lookup + hits = self.find(search_terms, klass) + if not hits: + return {} + + designator_propname = {} + for nm, propclass in klass.getprops().items(): + if (isinstance(propclass, hyperdb.Link) + or isinstance(propclass, hyperdb.Multilink)): + designator_propname[propclass.classname] = nm + + # build a dictionary of nodes and their associated messages + # and files + nodeids = {} # this is the answer + propspec = {} # used to do the klass.find + for propname in designator_propname.values(): + propspec[propname] = {} # used as a set (value doesn't matter) + + for classname, nodeid in hits: + # if it's a property on klass, it's easy + if classname == klass.classname: + if not nodeids.has_key(nodeid): + nodeids[nodeid] = {} + continue + + # make sure the class is a linked one, otherwise ignore + if not designator_propname.has_key(classname): + continue + + # it's a linked class - set up to do the klass.find + linkprop = designator_propname[classname] # eg, msg -> messages + propspec[linkprop][nodeid] = 1 + + # retain only the meaningful entries + for propname, idset in propspec.items(): + if not idset: + del propspec[propname] + + # klass.find tells me the klass nodeids the linked nodes relate to + for resid in klass.find(**propspec): + resid = str(resid) + if not nodeids.has_key(id): + nodeids[resid] = {} + node_dict = nodeids[resid] + # now figure out where it came from + for linkprop in propspec.keys(): + for nodeid in klass.get(resid, linkprop): + if propspec[linkprop].has_key(nodeid): + # OK, this node[propname] has a winner + if not node_dict.has_key(linkprop): + node_dict[linkprop] = [nodeid] + else: + node_dict[linkprop].append(nodeid) + return nodeids + + def find(self, search_terms, klass): + if not search_terms: + return None + + nodeids = self.tsearchQuery(klass.classname, search_terms) + designator_propname = {} + + for nm, propclass in klass.getprops().items(): + if _isLink(propclass): + nodeids.extend(self.tsearchQuery(propclass.classname, search_terms)) + + return nodeids + + def tsearchQuery(self, classname, search_terms): + query = """SELECT id FROM _%(classname)s + WHERE idxFTI @@ to_tsquery('default', '%(terms)s')""" + + query = query % {'classname': classname, + 'terms': ' & '.join(search_terms)} + self.db.cursor.execute(query) + klass = self.db.getclass(classname) + nodeids = [str(row[0]) for row in self.db.cursor.fetchall()] + + # filter out files without text/plain mime type + # XXX: files without text/plain shouldn't be indexed at all, we + # should take care of this in the trigger + if 'type' in klass.getprops(): + nodeids = [nodeid for nodeid in nodeids + if klass.get(nodeid, 'type') == 'text/plain'] + + return [(classname, nodeid) for nodeid in nodeids] + +# XXX: we can't reuse hyperdb.FileClass for importing/exporting, so file +# contents will end up in CSV exports for now. Not sure whether this is +# truly a problem. If it is, we should write an importer/exporter that +# converts from the database to the filesystem and vice versa +class FileClass(Class): + def __init__(self, db, classname, **properties): + '''The newly-created class automatically includes the "content" property., + ''' + properties['content'] = hyperdb.String(indexme='yes') + Class.__init__(self, db, classname, **properties) + + default_mime_type = 'text/plain' + def create(self, **propvalues): + # figure the mime type + if not propvalues.get('type'): + propvalues['type'] = self.default_mime_type + return Class.create(self, **propvalues)
--- a/roundup/backends/rdbms_common.py Wed Dec 15 00:00:52 2004 +0000 +++ b/roundup/backends/rdbms_common.py Thu Dec 16 22:22:55 2004 +0000 @@ -1,4 +1,4 @@ -# $Id: rdbms_common.py,v 1.142 2004-12-03 22:11:06 richard Exp $ +# $Id: rdbms_common.py,v 1.143 2004-12-16 22:22:55 jlgijsbers Exp $ ''' Relational database (SQL) backend common code. Basics: @@ -454,14 +454,21 @@ return 1 + def determine_all_columns(self, spec): + """Figure out the columns from the spec and also add internal columns + + """ + cols, mls = self.determine_columns(spec.properties.items()) + + # add on our special columns + cols.append(('id', 'INTEGER PRIMARY KEY')) + cols.append(('__retired__', 'INTEGER DEFAULT 0')) + return cols, mls + def create_class_table(self, spec): '''Create the class table for the given Class "spec". Creates the indexes too.''' - cols, mls = self.determine_columns(spec.properties.items()) - - # add on our special columns - cols.append(('id', 'INTEGER PRIMARY KEY')) - cols.append(('__retired__', 'INTEGER DEFAULT 0')) + cols, mls = self.determine_all_columns(spec) # create the base table scols = ','.join(['%s %s'%x for x in cols])
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roundup/backends/tsearch2_setup.py Thu Dec 16 22:22:55 2004 +0000 @@ -0,0 +1,708 @@ +tsearch_sql = """ +-- Adjust this setting to control where the objects get CREATEd. +SET search_path = public; + +--dict conf +CREATE TABLE pg_ts_dict ( + dict_name text not null primary key, + dict_init oid, + dict_initoption text, + dict_lexize oid not null, + dict_comment text +) with oids; + +--dict interface +CREATE FUNCTION lexize(oid, text) + returns _text + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +CREATE FUNCTION lexize(text, text) + returns _text + as '$libdir/tsearch2', 'lexize_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION lexize(text) + returns _text + as '$libdir/tsearch2', 'lexize_bycurrent' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curdict(int) + returns void + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curdict(text) + returns void + as '$libdir/tsearch2', 'set_curdict_byname' + language 'C' + with (isstrict); + +--built-in dictionaries +CREATE FUNCTION dex_init(text) + returns internal + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION dex_lexize(internal,internal,int4) + returns internal + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'simple', + (select oid from pg_proc where proname='dex_init'), + null, + (select oid from pg_proc where proname='dex_lexize'), + 'Simple example of dictionary.' +; + +CREATE FUNCTION snb_en_init(text) + returns internal + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION snb_lexize(internal,internal,int4) + returns internal + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'en_stem', + (select oid from pg_proc where proname='snb_en_init'), + '/usr/share/postgresql/contrib/english.stop', + (select oid from pg_proc where proname='snb_lexize'), + 'English Stemmer. Snowball.' +; + +CREATE FUNCTION snb_ru_init(text) + returns internal + as '$libdir/tsearch2' + language 'C'; + +insert into pg_ts_dict select + 'ru_stem', + (select oid from pg_proc where proname='snb_ru_init'), + '/usr/share/postgresql/contrib/russian.stop', + (select oid from pg_proc where proname='snb_lexize'), + 'Russian Stemmer. Snowball.' +; + +CREATE FUNCTION spell_init(text) + returns internal + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION spell_lexize(internal,internal,int4) + returns internal + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'ispell_template', + (select oid from pg_proc where proname='spell_init'), + null, + (select oid from pg_proc where proname='spell_lexize'), + 'ISpell interface. Must have .dict and .aff files' +; + +CREATE FUNCTION syn_init(text) + returns internal + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION syn_lexize(internal,internal,int4) + returns internal + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'synonym', + (select oid from pg_proc where proname='syn_init'), + null, + (select oid from pg_proc where proname='syn_lexize'), + 'Example of synonym dictionary' +; + +--dict conf +CREATE TABLE pg_ts_parser ( + prs_name text not null primary key, + prs_start oid not null, + prs_nexttoken oid not null, + prs_end oid not null, + prs_headline oid not null, + prs_lextype oid not null, + prs_comment text +) with oids; + +--sql-level interface +CREATE TYPE tokentype + as (tokid int4, alias text, descr text); + +CREATE FUNCTION token_type(int4) + returns setof tokentype + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +CREATE FUNCTION token_type(text) + returns setof tokentype + as '$libdir/tsearch2', 'token_type_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION token_type() + returns setof tokentype + as '$libdir/tsearch2', 'token_type_current' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curprs(int) + returns void + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curprs(text) + returns void + as '$libdir/tsearch2', 'set_curprs_byname' + language 'C' + with (isstrict); + +CREATE TYPE tokenout + as (tokid int4, token text); + +CREATE FUNCTION parse(oid,text) + returns setof tokenout + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +CREATE FUNCTION parse(text,text) + returns setof tokenout + as '$libdir/tsearch2', 'parse_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION parse(text) + returns setof tokenout + as '$libdir/tsearch2', 'parse_current' + language 'C' + with (isstrict); + +--default parser +CREATE FUNCTION prsd_start(internal,int4) + returns internal + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION prsd_getlexeme(internal,internal,internal) + returns int4 + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION prsd_end(internal) + returns void + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION prsd_lextype(internal) + returns internal + as '$libdir/tsearch2' + language 'C'; + +CREATE FUNCTION prsd_headline(internal,internal,internal) + returns internal + as '$libdir/tsearch2' + language 'C'; + +insert into pg_ts_parser select + 'default', + (select oid from pg_proc where proname='prsd_start'), + (select oid from pg_proc where proname='prsd_getlexeme'), + (select oid from pg_proc where proname='prsd_end'), + (select oid from pg_proc where proname='prsd_headline'), + (select oid from pg_proc where proname='prsd_lextype'), + 'Parser from OpenFTS v0.34' +; + +--tsearch config + +CREATE TABLE pg_ts_cfg ( + ts_name text not null primary key, + prs_name text not null, + locale text +) with oids; + +CREATE TABLE pg_ts_cfgmap ( + ts_name text not null, + tok_alias text not null, + dict_name text[], + primary key (ts_name,tok_alias) +) with oids; + +CREATE FUNCTION set_curcfg(int) + returns void + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curcfg(text) + returns void + as '$libdir/tsearch2', 'set_curcfg_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION show_curcfg() + returns oid + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +insert into pg_ts_cfg values ('default', 'default','C'); +insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R'); +insert into pg_ts_cfg values ('simple', 'default'); + +insert into pg_ts_cfgmap values ('default', 'lword', '{en_stem}'); +insert into pg_ts_cfgmap values ('default', 'nlword', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'word', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'email', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'url', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'host', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'sfloat', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'version', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'part_hword', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'nlpart_hword', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'lpart_hword', '{en_stem}'); +insert into pg_ts_cfgmap values ('default', 'hword', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'lhword', '{en_stem}'); +insert into pg_ts_cfgmap values ('default', 'nlhword', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'uri', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'file', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'float', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'int', '{simple}'); +insert into pg_ts_cfgmap values ('default', 'uint', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'lword', '{en_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'nlword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'word', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'email', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'url', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'host', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'sfloat', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'version', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'part_hword', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'nlpart_hword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'lpart_hword', '{en_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'hword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'lhword', '{en_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'nlhword', '{ru_stem}'); +insert into pg_ts_cfgmap values ('default_russian', 'uri', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'file', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'float', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'int', '{simple}'); +insert into pg_ts_cfgmap values ('default_russian', 'uint', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'lword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'nlword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'word', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'email', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'url', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'host', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'sfloat', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'version', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'part_hword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'nlpart_hword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'lpart_hword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'hword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'lhword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'nlhword', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'uri', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'file', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'float', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'int', '{simple}'); +insert into pg_ts_cfgmap values ('simple', 'uint', '{simple}'); + +--tsvector type +CREATE FUNCTION tsvector_in(cstring) +RETURNS tsvector +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION tsvector_out(tsvector) +RETURNS cstring +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE tsvector ( + INTERNALLENGTH = -1, + INPUT = tsvector_in, + OUTPUT = tsvector_out, + STORAGE = extended +); + +CREATE FUNCTION length(tsvector) +RETURNS int4 +AS '$libdir/tsearch2', 'tsvector_length' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION to_tsvector(oid, text) +RETURNS tsvector +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION to_tsvector(text, text) +RETURNS tsvector +AS '$libdir/tsearch2', 'to_tsvector_name' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION to_tsvector(text) +RETURNS tsvector +AS '$libdir/tsearch2', 'to_tsvector_current' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION strip(tsvector) +RETURNS tsvector +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION setweight(tsvector,"char") +RETURNS tsvector +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION concat(tsvector,tsvector) +RETURNS tsvector +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE OPERATOR || ( + LEFTARG = tsvector, + RIGHTARG = tsvector, + PROCEDURE = concat +); + +--query type +CREATE FUNCTION tsquery_in(cstring) +RETURNS tsquery +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION tsquery_out(tsquery) +RETURNS cstring +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE tsquery ( + INTERNALLENGTH = -1, + INPUT = tsquery_in, + OUTPUT = tsquery_out +); + +CREATE FUNCTION querytree(tsquery) +RETURNS text +AS '$libdir/tsearch2', 'tsquerytree' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION to_tsquery(oid, text) +RETURNS tsquery +AS '$libdir/tsearch2' +LANGUAGE 'c' with (isstrict,iscachable); + +CREATE FUNCTION to_tsquery(text, text) +RETURNS tsquery +AS '$libdir/tsearch2','to_tsquery_name' +LANGUAGE 'c' with (isstrict,iscachable); + +CREATE FUNCTION to_tsquery(text) +RETURNS tsquery +AS '$libdir/tsearch2','to_tsquery_current' +LANGUAGE 'c' with (isstrict,iscachable); + +--operations +CREATE FUNCTION exectsq(tsvector, tsquery) +RETURNS bool +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict, iscachable); + +COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index'; + +CREATE FUNCTION rexectsq(tsquery, tsvector) +RETURNS bool +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict, iscachable); + +COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index'; + +CREATE OPERATOR @@ ( + LEFTARG = tsvector, + RIGHTARG = tsquery, + PROCEDURE = exectsq, + COMMUTATOR = '@@', + RESTRICT = contsel, + JOIN = contjoinsel +); +CREATE OPERATOR @@ ( + LEFTARG = tsquery, + RIGHTARG = tsvector, + PROCEDURE = rexectsq, + COMMUTATOR = '@@', + RESTRICT = contsel, + JOIN = contjoinsel +); + +--Trigger +CREATE FUNCTION tsearch2() +RETURNS trigger +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +--Relevation +CREATE FUNCTION rank(float4[], tsvector, tsquery) +RETURNS float4 +AS '$libdir/tsearch2' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank(float4[], tsvector, tsquery, int4) +RETURNS float4 +AS '$libdir/tsearch2' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank(tsvector, tsquery) +RETURNS float4 +AS '$libdir/tsearch2', 'rank_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank(tsvector, tsquery, int4) +RETURNS float4 +AS '$libdir/tsearch2', 'rank_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(int4, tsvector, tsquery) +RETURNS float4 +AS '$libdir/tsearch2' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4) +RETURNS float4 +AS '$libdir/tsearch2' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(tsvector, tsquery) +RETURNS float4 +AS '$libdir/tsearch2', 'rank_cd_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(tsvector, tsquery, int4) +RETURNS float4 +AS '$libdir/tsearch2', 'rank_cd_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(oid, text, tsquery, text) +RETURNS text +AS '$libdir/tsearch2', 'headline' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(oid, text, tsquery) +RETURNS text +AS '$libdir/tsearch2', 'headline' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, text, tsquery, text) +RETURNS text +AS '$libdir/tsearch2', 'headline_byname' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, text, tsquery) +RETURNS text +AS '$libdir/tsearch2', 'headline_byname' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, tsquery, text) +RETURNS text +AS '$libdir/tsearch2', 'headline_current' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, tsquery) +RETURNS text +AS '$libdir/tsearch2', 'headline_current' +LANGUAGE 'C' WITH (isstrict, iscachable); + +--GiST +--GiST key type +CREATE FUNCTION gtsvector_in(cstring) +RETURNS gtsvector +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION gtsvector_out(gtsvector) +RETURNS cstring +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE gtsvector ( + INTERNALLENGTH = -1, + INPUT = gtsvector_in, + OUTPUT = gtsvector_out +); + +-- support FUNCTIONs +CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4) +RETURNS bool +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_compress(internal) +RETURNS internal +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_decompress(internal) +RETURNS internal +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_penalty(internal,internal,internal) +RETURNS internal +AS '$libdir/tsearch2' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION gtsvector_picksplit(internal, internal) +RETURNS internal +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_union(bytea, internal) +RETURNS _int4 +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal) +RETURNS internal +AS '$libdir/tsearch2' +LANGUAGE 'C'; + +-- CREATE the OPERATOR class +CREATE OPERATOR CLASS gist_tsvector_ops +DEFAULT FOR TYPE tsvector USING gist +AS + OPERATOR 1 @@ (tsvector, tsquery) RECHECK , + FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4), + FUNCTION 2 gtsvector_union (bytea, internal), + FUNCTION 3 gtsvector_compress (internal), + FUNCTION 4 gtsvector_decompress (internal), + FUNCTION 5 gtsvector_penalty (internal, internal, internal), + FUNCTION 6 gtsvector_picksplit (internal, internal), + FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal), + STORAGE gtsvector; + + +--stat info +CREATE TYPE statinfo + as (word text, ndoc int4, nentry int4); + +--CREATE FUNCTION tsstat_in(cstring) +--RETURNS tsstat +--AS '$libdir/tsearch2' +--LANGUAGE 'C' with (isstrict); +-- +--CREATE FUNCTION tsstat_out(tsstat) +--RETURNS cstring +--AS '$libdir/tsearch2' +--LANGUAGE 'C' with (isstrict); +-- +--CREATE TYPE tsstat ( +-- INTERNALLENGTH = -1, +-- INPUT = tsstat_in, +-- OUTPUT = tsstat_out, +-- STORAGE = plain +--); +-- +--CREATE FUNCTION ts_accum(tsstat,tsvector) +--RETURNS tsstat +--AS '$libdir/tsearch2' +--LANGUAGE 'C' with (isstrict); +-- +--CREATE FUNCTION ts_accum_finish(tsstat) +-- returns setof statinfo +-- as '$libdir/tsearch2' +-- language 'C' +-- with (isstrict); +-- +--CREATE AGGREGATE stat ( +-- BASETYPE=tsvector, +-- SFUNC=ts_accum, +-- STYPE=tsstat, +-- FINALFUNC = ts_accum_finish, +-- initcond = '' +--); + +CREATE FUNCTION stat(text) + returns setof statinfo + as '$libdir/tsearch2', 'ts_stat' + language 'C' + with (isstrict); + +--reset - just for debuging +CREATE FUNCTION reset_tsearch() + returns void + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +--get cover (debug for rank_cd) +CREATE FUNCTION get_covers(tsvector,tsquery) + returns text + as '$libdir/tsearch2' + language 'C' + with (isstrict); + +--debug function +create type tsdebug as ( + ts_name text, + tok_type text, + description text, + token text, + dict_name text[], + "tsvector" tsvector +); + +create function _get_parser_from_curcfg() +returns text as +' select prs_name from pg_ts_cfg where oid = show_curcfg() ' +language 'SQL' with(isstrict,iscachable); + +create function ts_debug(text) +returns setof tsdebug as ' +select + m.ts_name, + t.alias as tok_type, + t.descr as description, + p.token, + m.dict_name, + strip(to_tsvector(p.token)) as tsvector +from + parse( _get_parser_from_curcfg(), $1 ) as p, + token_type() as t, + pg_ts_cfgmap as m, + pg_ts_cfg as c +where + t.tokid=p.tokid and + t.alias = m.tok_alias and + m.ts_name=c.ts_name and + c.oid=show_curcfg() +' language 'SQL' with(isstrict); +""" + +def setup(cursor): + sql = '\n'.join([line for line in tsearch_sql.split('\n') + if not line.startswith('--')]) + for query in sql.split(';'): + if query.strip(): + cursor.execute(query)
--- a/test/db_test_base.py Wed Dec 15 00:00:52 2004 +0000 +++ b/test/db_test_base.py Thu Dec 16 22:22:55 2004 +0000 @@ -15,7 +15,7 @@ # BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, # SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. # -# $Id: db_test_base.py,v 1.55 2004-11-26 00:01:04 richard Exp $ +# $Id: db_test_base.py,v 1.56 2004-12-16 22:22:55 jlgijsbers Exp $ import unittest, os, shutil, errno, imp, sys, time, pprint @@ -78,6 +78,8 @@ priority=Link('priority')) stuff = module.Class(db, "stuff", stuff=String()) session = module.Class(db, 'session', title=String()) + msg = module.FileClass(db, "msg", + author=Link("user", do_journal='no')) session.disableJournalling() db.post_init() if create:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_tsearch2.py Thu Dec 16 22:22:55 2004 +0000 @@ -0,0 +1,124 @@ +# +# Copyright (c) 2001 Bizar Software Pty Ltd (http://www.bizarsoftware.com.au/) +# This module is free software, and you may redistribute it and/or modify +# under the same terms as Python, so long as this copyright message and +# disclaimer are retained in their original form. +# +# IN NO EVENT SHALL BIZAR SOFTWARE PTY LTD BE LIABLE TO ANY PARTY FOR +# DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING +# OUT OF THE USE OF THIS CODE, EVEN IF THE AUTHOR HAS BEEN ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# BIZAR SOFTWARE PTY LTD SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE CODE PROVIDED HEREUNDER IS ON AN "AS IS" +# BASIS, AND THERE IS NO OBLIGATION WHATSOEVER TO PROVIDE MAINTENANCE, +# SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +# +# $Id: test_tsearch2.py,v 1.1 2004-12-16 22:22:55 jlgijsbers Exp $ + +import unittest + +from roundup.hyperdb import DatabaseError + +from db_test_base import DBTest, ROTest, config, SchemaTest, ClassicInitTest + +from roundup.backends import get_backend, have_backend + +class tsearch2Opener: + if have_backend('tsearch2'): + module = get_backend('tsearch2') + + def setUp(self): + pass + + def tearDown(self): + self.nuke_database() + + def nuke_database(self): + # clear out the database - easiest way is to nuke and re-create it + self.module.db_nuke(config) + +class tsearch2DBTest(tsearch2Opener, DBTest): + def setUp(self): + tsearch2Opener.setUp(self) + DBTest.setUp(self) + + def tearDown(self): + DBTest.tearDown(self) + tsearch2Opener.tearDown(self) + + def testFilteringIntervalSort(self): + # Tsearch2 sorts NULLs differently to other databases (others + # treat it as lower than real values, PG treats it as higher) + ae, filt = self.filteringSetup() + # ascending should sort None, 1:10, 1d + ae(filt(None, {}, ('+','foo'), (None,None)), ['4', '1', '2', '3']) + # descending should sort 1d, 1:10, None + ae(filt(None, {}, ('-','foo'), (None,None)), ['3', '2', '1', '4']) + + def testTransactions(self): + # XXX: in its current form, this test doesn't make sense for tsearch2. + # It tests the transactions mechanism by counting the number of files + # in the FileStorage. As tsearch2 doesn't use the FileStorage, this + # fails. The test should probably be rewritten with some other way of + # checking rollbacks/commits. + pass + +class tsearch2ROTest(tsearch2Opener, ROTest): + def setUp(self): + tsearch2Opener.setUp(self) + ROTest.setUp(self) + + def tearDown(self): + ROTest.tearDown(self) + tsearch2Opener.tearDown(self) + +class tsearch2SchemaTest(tsearch2Opener, SchemaTest): + def setUp(self): + tsearch2Opener.setUp(self) + SchemaTest.setUp(self) + + def tearDown(self): + SchemaTest.tearDown(self) + tsearch2Opener.tearDown(self) + +class tsearch2ClassicInitTest(tsearch2Opener, ClassicInitTest): + backend = 'tsearch2' + def setUp(self): + tsearch2Opener.setUp(self) + ClassicInitTest.setUp(self) + + def tearDown(self): + ClassicInitTest.tearDown(self) + tsearch2Opener.tearDown(self) + +from session_common import RDBMSTest +class tsearch2SessionTest(tsearch2Opener, RDBMSTest): + def setUp(self): + tsearch2Opener.setUp(self) + RDBMSTest.setUp(self) + def tearDown(self): + RDBMSTest.tearDown(self) + tsearch2Opener.tearDown(self) + +def test_suite(): + suite = unittest.TestSuite() + if not have_backend('tsearch2'): + print "Skipping tsearch2 tests" + return suite + + # make sure we start with a clean slate + if tsearch2Opener.module.db_exists(config): + tsearch2Opener.module.db_nuke(config, 1) + + # TODO: Check if we can run postgresql tests + print 'Including tsearch2 tests' + suite.addTest(unittest.makeSuite(tsearch2DBTest)) + suite.addTest(unittest.makeSuite(tsearch2ROTest)) + suite.addTest(unittest.makeSuite(tsearch2SchemaTest)) + suite.addTest(unittest.makeSuite(tsearch2ClassicInitTest)) + suite.addTest(unittest.makeSuite(tsearch2SessionTest)) + return suite + +# vim: set et sts=4 sw=4 :
