view roundup/backends/back_tsearch2.py @ 3992:fe2af84a5ca5

allow binary data for "content" props through rawToHyperdb
author Richard Jones <richard@users.sourceforge.net>
date Mon, 18 Aug 2008 06:21:53 +0000
parents a8c2371f45b6
children 6e3e4f24c753
line wrap: on
line source

#$Id: back_tsearch2.py,v 1.9 2005-01-08 16:16:59 jlgijsbers Exp $

# Note: this backend is EXPERIMENTAL. Do not use if you value your data.
import re

import psycopg

from roundup import hyperdb
from roundup.support import ensureParentsExist
from roundup.backends import back_postgresql, tsearch2_setup, indexer_rdbms
from roundup.backends.back_postgresql import db_create, db_nuke, db_command
from roundup.backends.back_postgresql import pg_command, db_exists, Class, IssueClass, FileClass
from roundup.backends.indexer_common import _isLink, Indexer

# XXX: Should probably be on the Class class.
def _indexedProps(spec):
    """Get a list of properties to be indexed on 'spec'."""
    return [prop for prop, propclass in spec.getprops().items()
            if isinstance(propclass, hyperdb.String) and propclass.indexme]

def _getQueryDict(spec):
    """Get a convenience dictionary for creating tsearch2 indexes."""
    query_dict = {'classname': spec.classname,
                  'indexedColumns': ['_' + prop for prop in _indexedProps(spec)]}
    query_dict['tablename'] = "_%(classname)s" % query_dict
    query_dict['triggername'] = "%(tablename)s_tsvectorupdate" % query_dict
    return query_dict

class Database(back_postgresql.Database):
    def __init__(self, config, journaltag=None):
        back_postgresql.Database.__init__(self, config, journaltag)
        self.indexer = Indexer(self)
    
    def create_version_2_tables(self):
        back_postgresql.Database.create_version_2_tables(self)
        tsearch2_setup.setup(self.cursor)    

    def create_class_table_indexes(self, spec):
        back_postgresql.Database.create_class_table_indexes(self, spec)
        self.cursor.execute("""CREATE INDEX _%(classname)s_idxFTI_idx
                               ON %(tablename)s USING gist(idxFTI);""" %
                            _getQueryDict(spec))

        self.create_tsearch2_trigger(spec)

    def create_tsearch2_trigger(self, spec):
        d = _getQueryDict(spec)
        if d['indexedColumns']:
            
            d['joined'] = " || ' ' ||".join(d['indexedColumns'])
            query = """UPDATE %(tablename)s
                       SET idxFTI = to_tsvector('default', %(joined)s)""" % d
            self.cursor.execute(query)

            d['joined'] = ", ".join(d['indexedColumns']) 
            query = """CREATE TRIGGER %(triggername)s
                       BEFORE UPDATE OR INSERT ON %(tablename)s
                       FOR EACH ROW EXECUTE PROCEDURE
                       tsearch2(idxFTI, %(joined)s);""" % d
            self.cursor.execute(query)

    def drop_tsearch2_trigger(self, spec):
        # Check whether the trigger exists before trying to drop it.
        query_dict = _getQueryDict(spec)
        self.sql("""SELECT tgname FROM pg_catalog.pg_trigger
                    WHERE tgname = '%(triggername)s'""" % query_dict)
        if self.cursor.fetchall():
            self.sql("""DROP TRIGGER %(triggername)s ON %(tablename)s""" %
                     query_dict)

    def update_class(self, spec, old_spec, force=0):
        result = back_postgresql.Database.update_class(self, spec, old_spec, force)

        # Drop trigger...
        self.drop_tsearch2_trigger(spec)

        # and recreate if necessary.
        self.create_tsearch2_trigger(spec)

        return result

    def determine_all_columns(self, spec):
        cols, mls = back_postgresql.Database.determine_all_columns(self, spec)
        cols.append(('idxFTI', 'tsvector'))
        return cols, mls
        
class Indexer(Indexer):
    def __init__(self, db):
        self.db = db

    # This indexer never needs to reindex.
    def should_reindex(self):
        return 0

    def getHits(self, search_terms, klass):
        return self.find(search_terms, klass)    
    
    def find(self, search_terms, klass):
        if not search_terms:
            return None

        hits = self.tsearchQuery(klass.classname, search_terms)
        designator_propname = {}

        for nm, propclass in klass.getprops().items():
            if _isLink(propclass):
                hits.extend(self.tsearchQuery(propclass.classname, search_terms))

        return hits

    def tsearchQuery(self, classname, search_terms):
        query = """SELECT id FROM _%(classname)s
                   WHERE idxFTI @@ to_tsquery('default', '%(terms)s')"""                    
        
        query = query % {'classname': classname,
                         'terms': ' & '.join(search_terms)}
        self.db.cursor.execute(query)
        klass = self.db.getclass(classname)
        nodeids = [str(row[0]) for row in self.db.cursor.fetchall()]

        # filter out files without text/plain mime type
        # XXX: files without text/plain shouldn't be indexed at all, we
        # should take care of this in the trigger
        if klass.getprops().has_key('type'):
            nodeids = [nodeid for nodeid in nodeids
                       if klass.get(nodeid, 'type') == 'text/plain']

        # XXX: We haven't implemented property-level search, so I'm just faking
        # it here with a property named 'XXX'. We still need to fix the other
        # backends and indexer_common.Indexer.search to only want to unpack two
        # values.
        return [(classname, nodeid, 'XXX') for nodeid in nodeids]

    # These only exist to satisfy the interface that's expected from indexers.
    def force_reindex(self):
        pass

    def add_text(self, identifier, text, mime_type=None):
        pass

    def close(self):
        pass

class FileClass(hyperdb.FileClass, Class):
    '''This class defines a large chunk of data. To support this, it has a
       mandatory String property "content" which is typically saved off
       externally to the hyperdb.

       However, this implementation just stores it in the hyperdb.
    '''
    def __init__(self, db, classname, **properties):
        '''The newly-created class automatically includes the "content" property.,
        '''
        properties['content'] = hyperdb.String(indexme='yes')
        Class.__init__(self, db, classname, **properties)

    default_mime_type = 'text/plain'
    def create(self, **propvalues):
        # figure the mime type
        if self.getprops().has_key('type') and not propvalues.get('type'):
            propvalues['type'] = self.default_mime_type
        return Class.create(self, **propvalues)

    def export_files(self, dirname, nodeid):
        dest = self.exportFilename(dirname, nodeid)
        ensureParentsExist(dest)
        fp = open(dest, "w")
        fp.write(self.get(nodeid, "content", default=''))
        fp.close()

    def import_files(self, dirname, nodeid):
        source = self.exportFilename(dirname, nodeid)

        fp = open(source, "r")
        # Use Database.setnode instead of self.set or self.set_inner here, as
        # Database.setnode doesn't update the "activity" or "actor" properties.
        self.db.setnode(self.classname, nodeid, values={'content': fp.read()})
        fp.close()

Roundup Issue Tracker: http://roundup-tracker.org/