comparison test/test_indexer.py @ 6588:91ab3e0ffcd0

Summary: Add test cases for sqlite fts Add support for using the FTS5 full text query engine for sqlite. Also stubbed out some sections for adding postgresql FTS support as well. Added nee indexer type native-fts. It is not selected by default. The indexer=native is used if no indexer is set. This prevents an upgrade from seeming to wipe out the native index if upgraded and indexer=native is not explicitly set. Docs updated. Also changed section headers to sentence case for the current release notes. Indexing backend can control if the full text search phrase is broken into a list of words or passed intact. For backends with query languages (sqlite and can be enabled for whoosh and xapian) we do not want the phrase "tokenized" on whitespace. This also updates the rdbms database version to version 7 to add FTS table. I will be using the same version when I add postgresql. If somebody runs this version on postgresql, they will have to manually add the fts tables for postgresql if they want to use it. Added a new renderError method to client. This allows errors to be reported still using page.html rather than raw html. It also supports templates for any error code. If no template for the error code (e.g. 400) is found, the error in raw html with no page frame is shown. New IndexerQueryError exception to pass back message about query syntax errors.
author John Rouillard <rouilj@ieee.org>
date Sun, 23 Jan 2022 18:57:45 -0500
parents c26b9ce33ae3
children 39189dd94f2c
comparison
equal deleted inserted replaced
6587:4f8fc55730e1 6588:91ab3e0ffcd0
22 22
23 import pytest 23 import pytest
24 from roundup.backends import get_backend, have_backend 24 from roundup.backends import get_backend, have_backend
25 from roundup.backends.indexer_rdbms import Indexer 25 from roundup.backends.indexer_rdbms import Indexer
26 26
27 from roundup.cgi.exceptions import IndexerQueryError
28
27 # borrow from other tests 29 # borrow from other tests
28 from .db_test_base import setupSchema, config 30 from .db_test_base import setupSchema, config
29 from .test_postgresql import postgresqlOpener, skip_postgresql 31 from .test_postgresql import postgresqlOpener, skip_postgresql
30 from .test_mysql import mysqlOpener, skip_mysql 32 from .test_mysql import mysqlOpener, skip_mysql
31 from .test_sqlite import sqliteOpener 33 from .test_sqlite import sqliteOpener
113 self.assertSeqEqual(self.dex.find(['with','world']), 115 self.assertSeqEqual(self.dex.find(['with','world']),
114 [('test', '1', 'bar'), 116 [('test', '1', 'bar'),
115 ('test', '2', 'bar')]) 117 ('test', '2', 'bar')])
116 def test_extremewords(self): 118 def test_extremewords(self):
117 """Testing too short or too long words.""" 119 """Testing too short or too long words."""
120
121 # skip this for FTS test
122 if isinstance(self,sqliteFtsIndexerTest):
123 pytest.skip("extremewords not tested for native FTS backends")
124
118 short = "b" 125 short = "b"
119 long = "abcdefghijklmnopqrstuvwxyz" 126 long = "abcdefghijklmnopqrstuvwxyz"
120 self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) 127 self.dex.add_text(('test', '1', 'a'), '%s hello world' % short)
121 self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short) 128 self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short)
122 self.dex.add_text(('test', '3', 'a'), 'blah Blub river') 129 self.dex.add_text(('test', '3', 'a'), 'blah Blub river')
217 RDBMSIndexerTest.setUp(self) 224 RDBMSIndexerTest.setUp(self)
218 def tearDown(self): 225 def tearDown(self):
219 RDBMSIndexerTest.tearDown(self) 226 RDBMSIndexerTest.tearDown(self)
220 postgresqlOpener.tearDown(self) 227 postgresqlOpener.tearDown(self)
221 228
229 """
230 @skip_postgresql
231 class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest):
232 def setUp(self):
233 postgresqlOpener.setUp(self)
234 RDBMSIndexerTest.setUp(self)
235 def tearDown(self):
236 RDBMSIndexerTest.tearDown(self)
237 postgresqlOpener.tearDown(self)
238 """
222 239
223 @skip_mysql 240 @skip_mysql
224 class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest): 241 class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest):
225 def setUp(self): 242 def setUp(self):
226 mysqlOpener.setUp(self) 243 mysqlOpener.setUp(self)
231 248
232 249
233 class sqliteIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): 250 class sqliteIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest):
234 pass 251 pass
235 252
253 class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest):
254 def setUp(self):
255 RDBMSIndexerTest.setUp(self)
256 from roundup.backends.indexer_sqlite_fts import Indexer
257 self.dex = Indexer(db)
258 self.dex.db = self.db
259
260 def test_phrase_and_near(self):
261 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
262 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')
263 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world')
264 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world')
265
266 # test two separate words for sanity
267 self.assertSeqEqual(self.dex.find(['"hello" "world"']),
268 [('test', '1', 'foo'),
269 ('test', '3', 'foo'),
270 ('test', '4', 'foo')
271 ])
272 # now check the phrase
273 self.assertSeqEqual(self.dex.find(['"hello world"']),
274 [('test', '1', 'foo'),
275 ])
276
277 # now check the phrase with near explicitly 0 intervening items
278 self.assertSeqEqual(self.dex.find(['NEAR(hello world, 0)']),
279 [('test', '1', 'foo'),
280 ])
281
282 # now check the phrase with near explicitly 1 intervening item
283 self.assertSeqEqual(self.dex.find(['NEAR(hello world, 1)']),
284 [('test', '1', 'foo'),
285 ('test', '3', 'foo'),
286 ])
287 # now check the phrase with near explicitly 3 intervening item
288 self.assertSeqEqual(self.dex.find(['NEAR(hello world, 3)']),
289 [('test', '1', 'foo'),
290 ('test', '3', 'foo'),
291 ('test', '4', 'foo'),
292 ])
293
294 def test_prefix(self):
295 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
296 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')
297 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world')
298 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world')
299
300 self.assertSeqEqual(self.dex.find(['hel*']),
301 [('test', '1', 'foo'),
302 ('test', '2', 'foo'),
303 ('test', '3', 'foo'),
304 ('test', '4', 'foo')
305 ])
306
307
308 def test_bool_start(self):
309 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
310 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')
311 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world')
312 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world')
313
314 self.assertSeqEqual(self.dex.find(['hel* NOT helh NOT blech']),
315 [('test', '1', 'foo'),
316 ('test', '3', 'foo'),
317 ])
318
319 self.assertSeqEqual(self.dex.find(['hel* NOT helh NOT blech OR the']),
320 [('test', '1', 'foo'),
321 ('test', '2', 'foo'),
322 ('test', '3', 'foo'),
323 ('test', '4', 'foo'),
324 ])
325
326 self.assertSeqEqual(self.dex.find(['helh OR hello']),
327 [('test', '1', 'foo'),
328 ('test', '2', 'foo'),
329 ('test', '3', 'foo'),
330 ('test', '4', 'foo'),
331 ])
332
333
334 self.assertSeqEqual(self.dex.find(['helh AND hello']),
335 [])
336 # matches if line starts with hello
337 self.assertSeqEqual(self.dex.find(['^hello']),
338 [
339 ('test', '4', 'foo'),
340 ])
341
342 self.assertSeqEqual(self.dex.find(['hello']),
343 [
344 ('test', '1', 'foo'),
345 ('test', '3', 'foo'),
346 ('test', '4', 'foo'),
347 ])
348
349 def test_query_errors(self):
350 """test query phrases that generate an error. Also test the
351 correction"""
352
353 self.dex.add_text(('test', '1', 'foo'), 'a the hello-world')
354 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')
355 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world')
356 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world')
357
358 # handle known error that roundup recognizes and tries to diagnose
359 with self.assertRaises(IndexerQueryError) as ctx:
360 self.dex.find(['the hello-world'])
361
362 error = ( "Search failed. Try quoting any terms that include a '-' "
363 "and retry the search.")
364 self.assertEqual(str(ctx.exception), error)
365
366
367 self.assertSeqEqual(self.dex.find(['the "hello-world"']),
368 [('test', '1', 'foo'),
369 ])
370
371 # handle known error that roundup recognizes and tries to diagnose
372 with self.assertRaises(IndexerQueryError) as ctx:
373 self.dex.find(['hello world + ^the'])
374
375 error = 'Query error: syntax error near "^"'
376 self.assertEqual(str(ctx.exception), error)
377
236 # vim: set filetype=python ts=4 sw=4 et si 378 # vim: set filetype=python ts=4 sw=4 et si

Roundup Issue Tracker: http://roundup-tracker.org/