comparison roundup/backends/indexer_sqlite_fts.py @ 6588:91ab3e0ffcd0

Summary: Add test cases for sqlite fts Add support for using the FTS5 full text query engine for sqlite. Also stubbed out some sections for adding postgresql FTS support as well. Added nee indexer type native-fts. It is not selected by default. The indexer=native is used if no indexer is set. This prevents an upgrade from seeming to wipe out the native index if upgraded and indexer=native is not explicitly set. Docs updated. Also changed section headers to sentence case for the current release notes. Indexing backend can control if the full text search phrase is broken into a list of words or passed intact. For backends with query languages (sqlite and can be enabled for whoosh and xapian) we do not want the phrase "tokenized" on whitespace. This also updates the rdbms database version to version 7 to add FTS table. I will be using the same version when I add postgresql. If somebody runs this version on postgresql, they will have to manually add the fts tables for postgresql if they want to use it. Added a new renderError method to client. This allows errors to be reported still using page.html rather than raw html. It also supports templates for any error code. If no template for the error code (e.g. 400) is found, the error in raw html with no page frame is shown. New IndexerQueryError exception to pass back message about query syntax errors.
author John Rouillard <rouilj@ieee.org>
date Sun, 23 Jan 2022 18:57:45 -0500
parents
children 9ff091537f43
comparison
equal deleted inserted replaced
6587:4f8fc55730e1 6588:91ab3e0ffcd0
1 """ This implements the full-text indexer using fts5 in sqlite.
2 The table consists of (Class, propname, itemid) instances as columns
3 along with a textblob column. The textblob column is searched using
4 MATCH and the instances returned.
5
6 sqlite test commands to manage schema version change required by
7 this update.
8
9 -- check length before and after
10 select length(schema) from schema;
11
12 -- reset from version 7 (with fts index) to version 6
13 update schema set schema = (select replace(schema,
14 '''version'': 7','''version'': 6') as new_schema from schema);
15
16 -- check version. Good thing it's at the front of the schema
17 select substr(schema,0,15) from schema;
18 {'version': 6,
19 """
20
21 from roundup.backends.indexer_common import Indexer as IndexerBase
22 from roundup.i18n import _
23 from roundup.cgi.exceptions import IndexerQueryError
24
25 try:
26 import sqlite3 as sqlite
27 if sqlite.sqlite_version_info < (3, 9, 0):
28 raise ValueError('sqlite minimum version for FTS5 is 3.9.0+ '
29 '- %s found' % sqlite.sqlite_version)
30 except ImportError:
31 raise ValueError('Unable to import sqlite3 to support FTS.')
32
33
34 class Indexer(IndexerBase):
35 def __init__(self, db):
36 IndexerBase.__init__(self, db)
37 self.db = db
38 self.reindex = 0
39 self.query_language = True
40
41 def close(self):
42 """close the indexing database"""
43 # just nuke the circular reference
44 self.db = None
45
46 def save_index(self):
47 """Save the changes to the index."""
48 # not necessary - the RDBMS connection will handle this for us
49 pass
50
51 def force_reindex(self):
52 """Force a reindexing of the database. This essentially
53 empties the __fts table and sets a flag so
54 that the databases are reindexed"""
55 self.reindex = 1
56
57 def should_reindex(self):
58 """returns True if the indexes need to be rebuilt"""
59 return self.reindex
60
61 def add_text(self, identifier, text, mime_type='text/plain'):
62 """ "identifier" is (classname, itemid, property) """
63 if mime_type != 'text/plain':
64 return
65
66 # Ensure all elements of the identifier are strings 'cos the itemid
67 # column is varchar even if item ids may be numbers elsewhere in the
68 # code. ugh.
69 identifier = tuple(map(str, identifier))
70
71 # removed pre-processing of text that incudes only words with:
72 # self.minlength <= len(word) <= self.maxlength
73 # Not sure if that is correct.
74
75 # first, find the rowid of the (classname, itemid, property)
76 a = self.db.arg # arg is the token for positional parameters
77 sql = 'select rowid from __fts where _class=%s and '\
78 '_itemid=%s and _prop=%s' % (a, a, a)
79 self.db.cursor.execute(sql, identifier)
80 r = self.db.cursor.fetchone()
81 if not r:
82 # not previously indexed
83 sql = 'insert into __fts (_class, _itemid, _prop, _textblob)'\
84 ' values (%s, %s, %s, %s)' % (a, a, a, a)
85 self.db.cursor.execute(sql, identifier + (text,))
86 else:
87 id = int(r[0])
88 sql = 'update __fts set _textblob=%s where rowid=%s' % \
89 (a, a)
90 self.db.cursor.execute(sql, (text, id))
91
92 def find(self, wordlist):
93 """look up all the words in the wordlist.
94 For testing wordlist is actually a list.
95 In production, wordlist is a list of a single string
96 that is a sqlite MATCH query.
97
98 https://www.sqlite.org/fts5.html#full_text_query_syntax
99 """
100 if not wordlist:
101 return []
102
103 a = self.db.arg # arg is the token for positional parameters
104
105 # removed filtering of word in wordlist to include only
106 # words with: self.minlength <= len(word) <= self.maxlength
107
108 sql = 'select _class, _itemid, _prop from __fts '\
109 'where _textblob MATCH %s' % a
110
111 try:
112 # tests supply a multi element word list. Join them.
113 self.db.cursor.execute(sql, (" ".join(wordlist),))
114 except sqlite.OperationalError as e:
115 if 'no such column' in e.args[0]:
116 raise IndexerQueryError(
117 _("Search failed. Try quoting any terms that "
118 "include a '-' and retry the search."))
119 else:
120 raise IndexerQueryError(e.args[0].replace("fts5:",
121 "Query error:"))
122
123 return self.db.cursor.fetchall()

Roundup Issue Tracker: http://roundup-tracker.org/