comparison test/test_indexer.py @ 6604:0d99ae7c8de6

Allow Roundup to use PostgreSQL database native full text search back_postgreql.py - schema version changes for schema version 7. configuration.py - added indexer_language checks for postgresql. Hardcoded list for now. Docs admin_guide and upgrading Tests. This also restructures the version upgrade tests for the rdbms backends. They can run all of them now as the proper cascade is developed to roll back changes to version 6.
author John Rouillard <rouilj@ieee.org>
date Thu, 27 Jan 2022 19:48:48 -0500
parents 39189dd94f2c
children 3260926d7e7e
comparison
equal deleted inserted replaced
6603:57dc15ad648d 6604:0d99ae7c8de6
117 ('test', '2', 'bar')]) 117 ('test', '2', 'bar')])
118 def test_extremewords(self): 118 def test_extremewords(self):
119 """Testing too short or too long words.""" 119 """Testing too short or too long words."""
120 120
121 # skip this for FTS test 121 # skip this for FTS test
122 if isinstance(self,sqliteFtsIndexerTest): 122 if ( isinstance(self,sqliteFtsIndexerTest) or
123 isinstance(self,postgresqlFtsIndexerTest)):
123 pytest.skip("extremewords not tested for native FTS backends") 124 pytest.skip("extremewords not tested for native FTS backends")
124 125
125 short = "b" 126 short = "b"
126 long = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" 127 long = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"
127 self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) 128 self.dex.add_text(('test', '1', 'a'), '%s hello world' % short)
138 # but only considered length(word)>=3 to be significant 139 # but only considered length(word)>=3 to be significant
139 self.dex.add_text(('test', '5', 'a'), 'blah py %s %s' 140 self.dex.add_text(('test', '5', 'a'), 'blah py %s %s'
140 % (short, long)) 141 % (short, long))
141 self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')]) 142 self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')])
142 143
143 def test_casesensitity(self): 144 def test_casesensitivity(self):
144 """Test if searches are case-in-sensitive.""" 145 """Test if searches are case-in-sensitive."""
145 self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb') 146 self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb')
146 self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB') 147 self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB')
147 self.assertSeqEqual(self.dex.find(['aaaa']), 148 self.assertSeqEqual(self.dex.find(['aaaa']),
148 [('test', '1', 'a'), 149 [('test', '1', 'a'),
224 RDBMSIndexerTest.setUp(self) 225 RDBMSIndexerTest.setUp(self)
225 def tearDown(self): 226 def tearDown(self):
226 RDBMSIndexerTest.tearDown(self) 227 RDBMSIndexerTest.tearDown(self)
227 postgresqlOpener.tearDown(self) 228 postgresqlOpener.tearDown(self)
228 229
229 """ 230
230 @skip_postgresql 231 @skip_postgresql
231 class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest): 232 class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest):
232 def setUp(self): 233 def setUp(self):
233 postgresqlOpener.setUp(self) 234 postgresqlOpener.setUp(self)
234 RDBMSIndexerTest.setUp(self) 235 RDBMSIndexerTest.setUp(self)
236 from roundup.backends.indexer_postgresql_fts import Indexer
237 self.dex = Indexer(self.db)
238 self.dex.db = self.db
239
235 def tearDown(self): 240 def tearDown(self):
236 RDBMSIndexerTest.tearDown(self) 241 RDBMSIndexerTest.tearDown(self)
237 postgresqlOpener.tearDown(self) 242 postgresqlOpener.tearDown(self)
238 """ 243
244 def test_websearch_syntax(self):
245 """Test searches using websearch_to_tsquery. These never throw
246 errors regardless of how wacky the input.
247 """
248
249 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
250 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')
251 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world')
252 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world')
253 self.dex.add_text(('test', '5', 'foo'), 'a car drove')
254 self.dex.add_text(('test', '6', 'foo'), 'a car driving itself')
255 self.dex.add_text(('test', '7', 'foo'), "let's drive in the car")
256 self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie')
257
258 # test two separate words for sanity
259 self.assertSeqEqual(self.dex.find(['"hello" "world"']),
260 [('test', '1', 'foo'),
261 ('test', '3', 'foo'),
262 ('test', '4', 'foo')
263 ])
264 # now check the phrase
265 self.assertSeqEqual(self.dex.find(['"hello world"']),
266 [('test', '1', 'foo'),
267 ])
268
269 # test negation
270 self.assertSeqEqual(self.dex.find(['hello world -blech']),
271 [('test', '1', 'foo'),
272 ('test', '3', 'foo'),
273 ])
274
275 # phrase negation
276 self.assertSeqEqual(self.dex.find(['hello world -"blah hello"']),
277 [('test', '1', 'foo'),
278 ('test', '4', 'foo'),
279 ])
280
281 # test without or
282 self.assertSeqEqual(self.dex.find(['blah blech']),
283 [('test', '4', 'foo'),
284 ])
285
286 # test with or
287 self.assertSeqEqual(self.dex.find(['blah or blech']),
288 [ ('test', '2', 'foo'),
289 ('test', '3', 'foo'),
290 ('test', '4', 'foo'),
291 ])
292
293 # stemmer test for english
294 self.assertSeqEqual(self.dex.find(['ts:drive']),
295 [('test', '6', 'foo'),
296 ('test', '7', 'foo'),
297 ('test', '8', 'foo')
298 ])
299
300 # stemmer is not disabled by quotes 8-(
301 self.assertSeqEqual(self.dex.find(['ts:"drive"']),
302 [('test', '6', 'foo'),
303 ('test', '7', 'foo'),
304 ('test', '8', 'foo')
305 ])
306
307
308 # this is missing ts: at the start, so uses the websearch
309 # parser. We search for operator characters and wanr the user
310 # Otherwise "hello <-> world" is the same as "hello world"
311 # and is not a phrase search.
312 with self.assertRaises(IndexerQueryError) as ctx:
313 self.dex.find(['hello <-> world'])
314
315 self.assertIn('do a tsquery search', ctx.exception.args[0])
316
317 def test_tsquery_syntax(self):
318 """Because websearch_to_tsquery doesn't allow prefix searches,
319 near searches with any value except 1 (phrase search), allow
320 use of to_tsquery by prefixing the search term wih ts:.
321
322 However, unlike websearch_to_tsquery, this will throw a
323 psycopg2.errors.SyntaxError on bad input. SyntaxError is
324 re-raised as IndexerQueryError. But it makes a bunch of
325 useful expert functionality available.
326
327 """
328
329 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
330 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')
331 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world')
332 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world')
333 self.dex.add_text(('test', '5', 'foo'), 'a car drove')
334 self.dex.add_text(('test', '6', 'foo'), 'a car driving itself')
335 self.dex.add_text(('test', '7', 'foo'), "let's drive in the car")
336 self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie')
337 self.dex.db.commit()
338
339 # test two separate words for sanity
340 self.assertSeqEqual(self.dex.find(['ts:hello & world']),
341 [('test', '1', 'foo'),
342 ('test', '3', 'foo'),
343 ('test', '4', 'foo')
344 ])
345 # now check the phrase
346 self.assertSeqEqual(self.dex.find(['ts:hello <-> world']),
347 [('test', '1', 'foo'),
348 ])
349
350 # test negation
351 self.assertSeqEqual(self.dex.find(['ts:hello & world & !blech']),
352 [('test', '1', 'foo'),
353 ('test', '3', 'foo'),
354 ])
355
356 self.assertSeqEqual(self.dex.find(
357 ['ts:hello & world & !(blah <-> hello)']),
358 [('test', '1', 'foo'),
359 ('test', '4', 'foo'),
360 ])
361
362 # test without or
363 self.assertSeqEqual(self.dex.find(['ts:blah & blech']),
364 [('test', '4', 'foo'),
365 ])
366
367 # test with or
368 self.assertSeqEqual(self.dex.find(['ts:blah | blech']),
369 [ ('test', '2', 'foo'),
370 ('test', '3', 'foo'),
371 ('test', '4', 'foo'),
372 ])
373 # stemmer test for english
374 self.assertSeqEqual(self.dex.find(['ts:drive']),
375 [('test', '6', 'foo'),
376 ('test', '7', 'foo'),
377 ('test', '8', 'foo')
378 ])
379
380 # stemmer is not disabled by quotes 8-(
381 self.assertSeqEqual(self.dex.find(['ts:"drive"']),
382 [('test', '6', 'foo'),
383 ('test', '7', 'foo'),
384 ('test', '8', 'foo')
385 ])
386
387
388 # test with syntax error
389 with self.assertRaises(IndexerQueryError) as ctx:
390 self.dex.find(['ts:blah blech'])
391
392 self.assertEqual(ctx.exception.args[0],
393 'syntax error in tsquery: "blah blech"\n')
394
395 # now check the phrase Note unlike sqlite, order matters,
396 # hello must come first.
397 self.assertSeqEqual(self.dex.find(['ts:hello <-> world']),
398 [('test', '1', 'foo'),
399 ])
400
401 # now check the phrase with explicitly 1 intervening item
402 self.assertSeqEqual(self.dex.find(['ts:hello <2> world']),
403 [('test', '3', 'foo'),
404 ])
405 # now check the phrase with near explicitly 1 or 3 intervening items
406 self.assertSeqEqual(self.dex.find([
407 'ts:(hello <4> world) | (hello<2>world)']),
408 [('test', '3', 'foo'),
409 ('test', '4', 'foo'),
410 ])
411
412 # now check the phrase with near explicitly 3 intervening item
413 # with prefix for world.
414 self.assertSeqEqual(self.dex.find(['ts:hello <4> wor:*']),
415 [('test', '4', 'foo'),
416 ])
417
418 def test_invalid_language(self):
419 import psycopg2
420
421 from roundup.configuration import IndexerOption
422 IndexerOption.valid_langs.append("foo")
423 self.db.config["INDEXER_LANGUAGE"] = "foo"
424
425 with self.assertRaises(psycopg2.errors.UndefinedObject) as ctx:
426 # psycopg2.errors.UndefinedObject: text search configuration
427 # "foo" does not exist
428 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
429 self.assertIn('search configuration "foo" does', ctx.exception.args[0])
430 self.db.rollback()
431
432 with self.assertRaises(ValueError) as ctx:
433 self.dex.find(['"hello" "world"'])
434 self.assertIn('search configuration "foo" does', ctx.exception.args[0])
435 self.db.rollback()
436
437 self.db.config["INDEXER_LANGUAGE"] = "english"
239 438
240 @skip_mysql 439 @skip_mysql
241 class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest): 440 class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest):
242 def setUp(self): 441 def setUp(self):
243 mysqlOpener.setUp(self) 442 mysqlOpener.setUp(self)
252 451
253 class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): 452 class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest):
254 def setUp(self): 453 def setUp(self):
255 RDBMSIndexerTest.setUp(self) 454 RDBMSIndexerTest.setUp(self)
256 from roundup.backends.indexer_sqlite_fts import Indexer 455 from roundup.backends.indexer_sqlite_fts import Indexer
257 self.dex = Indexer(db) 456 self.dex = Indexer(self.db)
258 self.dex.db = self.db 457 self.dex.db = self.db
259 458
260 def test_phrase_and_near(self): 459 def test_phrase_and_near(self):
261 self.dex.add_text(('test', '1', 'foo'), 'a the hello world') 460 self.dex.add_text(('test', '1', 'foo'), 'a the hello world')
262 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') 461 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world')

Roundup Issue Tracker: http://roundup-tracker.org/