Mercurial > p > roundup > code
view roundup/backends/indexer_sqlite_fts.py @ 8543:1ffa1f42e1da
refactor: rework mime type comparison and clean code
rest.py:
accept application/* as match for application/json in non
/binary_context rest path.
allow defining default mime type to return when file/message is
missing mime type. Make it a class variable to it can be changed from
text/plain to text/markdown or whatever.
extract code from determine_output_format() to create
create_valid_content_types() method which returns a list of matching
mime types for a given type/subtype.
Eliminate mostly duplicate return statements by introducing a variable
to specify valid mime types in error message.
rest_common.py:
Fix error messages that now return application/* as valid mime type.
CHANGES.txt upgrading.txt rest.txt:
top level notes and corrections.
Also correct rst syntax on earlier change.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Tue, 24 Mar 2026 21:30:47 -0400 |
| parents | 9ff091537f43 |
| children |
line wrap: on
line source
""" This implements the full-text indexer using fts5 in sqlite. The table consists of (Class, propname, itemid) instances as columns along with a textblob column. The textblob column is searched using MATCH and the instances returned. sqlite test commands to manage schema version change required by this update. -- check length before and after select length(schema) from schema; -- reset from version 7 (with fts index) to version 6 update schema set schema = (select replace(schema, '''version'': 7','''version'': 6') as new_schema from schema); -- check version. Good thing it's at the front of the schema select substr(schema,0,15) from schema; {'version': 6, """ from roundup.backends.indexer_common import Indexer as IndexerBase from roundup.i18n import _ from roundup.cgi.exceptions import IndexerQueryError try: import sqlite3 as sqlite if sqlite.sqlite_version_info < (3, 9, 0): raise ValueError('sqlite minimum version for FTS5 is 3.9.0+ ' '- %s found' % sqlite.sqlite_version) except ImportError: raise ValueError('Unable to import sqlite3 to support FTS.') class Indexer(IndexerBase): def __init__(self, db): IndexerBase.__init__(self, db) self.db = db self.reindex = 0 self.query_language = True def close(self): """close the indexing database""" # just nuke the circular reference self.db = None def save_index(self): """Save the changes to the index.""" # not necessary - the RDBMS connection will handle this for us pass def force_reindex(self): """Force a reindexing of the database. This essentially empties the __fts table and sets a flag so that the databases are reindexed""" self.reindex = 1 def should_reindex(self): """returns True if the indexes need to be rebuilt""" return self.reindex def add_text(self, identifier, text, mime_type='text/plain'): """ "identifier" is (classname, itemid, property) """ if mime_type != 'text/plain': return # Ensure all elements of the identifier are strings 'cos the itemid # column is varchar even if item ids may be numbers elsewhere in the # code. ugh. identifier = tuple(map(str, identifier)) # removed pre-processing of text that incudes only words with: # self.minlength <= len(word) <= self.maxlength # Not sure if that is correct. # first, find the rowid of the (classname, itemid, property) a = self.db.arg # arg is the token for positional parameters sql = 'select rowid from __fts where _class=%s and '\ '_itemid=%s and _prop=%s' % (a, a, a) self.db.cursor.execute(sql, identifier) r = self.db.cursor.fetchone() if not r: # not previously indexed sql = 'insert into __fts (_class, _itemid, _prop, _textblob)'\ ' values (%s, %s, %s, %s)' % (a, a, a, a) self.db.cursor.execute(sql, identifier + (text,)) else: id = int(r[0]) sql = 'update __fts set _textblob=%s where rowid=%s' % \ (a, a) self.db.cursor.execute(sql, (text, id)) def find(self, wordlist): """look up all the words in the wordlist. For testing wordlist is actually a list. In production, wordlist is a list of a single string that is a sqlite MATCH query. https://www.sqlite.org/fts5.html#full_text_query_syntax """ # Filter out stopwords. Other searches tokenize the user query # into an list of simple word tokens. For fTS, query # tokenization doesn't occur. # A user's FTS query is a wordlist with one element. The # element is a string to parse and will probably not match a # stop word. # # However the generic indexer search tests pass in a list of # word tokens. We filter the word tokens so it behaves like # other backends. This means that a search for a simple word # like 'the' (without quotes) will return no hits, as the test # expects. wordlist = [w for w in wordlist if not self.is_stopword(w.upper())] if not wordlist: return [] a = self.db.arg # arg is the token for positional parameters # removed filtering of word in wordlist to include only # words with: self.minlength <= len(word) <= self.maxlength sql = 'select _class, _itemid, _prop from __fts '\ 'where _textblob MATCH %s' % a try: # tests supply a multi element word list. Join them. self.db.cursor.execute(sql, (" ".join(wordlist),)) except sqlite.OperationalError as e: if 'no such column' in e.args[0]: raise IndexerQueryError( _("Search failed. Try quoting any terms that " "include a '-' and retry the search.")) else: raise IndexerQueryError(e.args[0].replace("fts5:", "Query error:")) return self.db.cursor.fetchall()
