annotate roundup/backends/indexer_sqlite_fts.py @ 8395:c7a2e01793cd 2.5.0

build: 2.5.0 release checkin. Tag to come.
author John Rouillard <rouilj@ieee.org>
date Sat, 12 Jul 2025 23:22:14 -0400
parents 9ff091537f43
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6588
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
1 """ This implements the full-text indexer using fts5 in sqlite.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
2 The table consists of (Class, propname, itemid) instances as columns
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
3 along with a textblob column. The textblob column is searched using
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
4 MATCH and the instances returned.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
5
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
6 sqlite test commands to manage schema version change required by
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
7 this update.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
8
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
9 -- check length before and after
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
10 select length(schema) from schema;
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
11
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
12 -- reset from version 7 (with fts index) to version 6
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
13 update schema set schema = (select replace(schema,
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
14 '''version'': 7','''version'': 6') as new_schema from schema);
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
15
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
16 -- check version. Good thing it's at the front of the schema
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
17 select substr(schema,0,15) from schema;
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
18 {'version': 6,
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
19 """
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
20
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
21 from roundup.backends.indexer_common import Indexer as IndexerBase
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
22 from roundup.i18n import _
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
23 from roundup.cgi.exceptions import IndexerQueryError
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
24
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
25 try:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
26 import sqlite3 as sqlite
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
27 if sqlite.sqlite_version_info < (3, 9, 0):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
28 raise ValueError('sqlite minimum version for FTS5 is 3.9.0+ '
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
29 '- %s found' % sqlite.sqlite_version)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
30 except ImportError:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
31 raise ValueError('Unable to import sqlite3 to support FTS.')
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
32
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
33
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
34 class Indexer(IndexerBase):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
35 def __init__(self, db):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
36 IndexerBase.__init__(self, db)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
37 self.db = db
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
38 self.reindex = 0
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
39 self.query_language = True
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
40
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
41 def close(self):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
42 """close the indexing database"""
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
43 # just nuke the circular reference
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
44 self.db = None
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
45
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
46 def save_index(self):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
47 """Save the changes to the index."""
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
48 # not necessary - the RDBMS connection will handle this for us
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
49 pass
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
50
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
51 def force_reindex(self):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
52 """Force a reindexing of the database. This essentially
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
53 empties the __fts table and sets a flag so
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
54 that the databases are reindexed"""
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
55 self.reindex = 1
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
56
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
57 def should_reindex(self):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
58 """returns True if the indexes need to be rebuilt"""
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
59 return self.reindex
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
60
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
61 def add_text(self, identifier, text, mime_type='text/plain'):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
62 """ "identifier" is (classname, itemid, property) """
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
63 if mime_type != 'text/plain':
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
64 return
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
65
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
66 # Ensure all elements of the identifier are strings 'cos the itemid
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
67 # column is varchar even if item ids may be numbers elsewhere in the
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
68 # code. ugh.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
69 identifier = tuple(map(str, identifier))
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
70
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
71 # removed pre-processing of text that incudes only words with:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
72 # self.minlength <= len(word) <= self.maxlength
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
73 # Not sure if that is correct.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
74
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
75 # first, find the rowid of the (classname, itemid, property)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
76 a = self.db.arg # arg is the token for positional parameters
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
77 sql = 'select rowid from __fts where _class=%s and '\
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
78 '_itemid=%s and _prop=%s' % (a, a, a)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
79 self.db.cursor.execute(sql, identifier)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
80 r = self.db.cursor.fetchone()
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
81 if not r:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
82 # not previously indexed
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
83 sql = 'insert into __fts (_class, _itemid, _prop, _textblob)'\
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
84 ' values (%s, %s, %s, %s)' % (a, a, a, a)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
85 self.db.cursor.execute(sql, identifier + (text,))
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
86 else:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
87 id = int(r[0])
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
88 sql = 'update __fts set _textblob=%s where rowid=%s' % \
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
89 (a, a)
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
90 self.db.cursor.execute(sql, (text, id))
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
91
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
92 def find(self, wordlist):
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
93 """look up all the words in the wordlist.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
94 For testing wordlist is actually a list.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
95 In production, wordlist is a list of a single string
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
96 that is a sqlite MATCH query.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
97
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
98 https://www.sqlite.org/fts5.html#full_text_query_syntax
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
99 """
6915
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
100
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
101 # Filter out stopwords. Other searches tokenize the user query
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
102 # into an list of simple word tokens. For fTS, query
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
103 # tokenization doesn't occur.
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
104
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
105 # A user's FTS query is a wordlist with one element. The
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
106 # element is a string to parse and will probably not match a
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
107 # stop word.
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
108 #
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
109 # However the generic indexer search tests pass in a list of
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
110 # word tokens. We filter the word tokens so it behaves like
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
111 # other backends. This means that a search for a simple word
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
112 # like 'the' (without quotes) will return no hits, as the test
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
113 # expects.
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
114 wordlist = [w for w in wordlist if not self.is_stopword(w.upper())]
9ff091537f43 postgresql native-fts; more indexer tests
John Rouillard <rouilj@ieee.org>
parents: 6588
diff changeset
115
6588
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
116 if not wordlist:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
117 return []
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
118
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
119 a = self.db.arg # arg is the token for positional parameters
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
120
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
121 # removed filtering of word in wordlist to include only
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
122 # words with: self.minlength <= len(word) <= self.maxlength
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
123
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
124 sql = 'select _class, _itemid, _prop from __fts '\
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
125 'where _textblob MATCH %s' % a
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
126
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
127 try:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
128 # tests supply a multi element word list. Join them.
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
129 self.db.cursor.execute(sql, (" ".join(wordlist),))
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
130 except sqlite.OperationalError as e:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
131 if 'no such column' in e.args[0]:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
132 raise IndexerQueryError(
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
133 _("Search failed. Try quoting any terms that "
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
134 "include a '-' and retry the search."))
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
135 else:
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
136 raise IndexerQueryError(e.args[0].replace("fts5:",
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
137 "Query error:"))
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
138
91ab3e0ffcd0 Summary: Add test cases for sqlite fts
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
139 return self.db.cursor.fetchall()

Roundup Issue Tracker: http://roundup-tracker.org/