Mercurial > p > roundup > code
comparison test/test_indexer.py @ 6604:0d99ae7c8de6
Allow Roundup to use PostgreSQL database native full text search
back_postgreql.py - schema version changes for schema version 7.
configuration.py - added indexer_language checks for postgresql. Hardcoded
list for now.
Docs admin_guide and upgrading
Tests.
This also restructures the version upgrade tests for the rdbms
backends. They can run all of them now as the proper cascade is
developed to roll back changes to version 6.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Thu, 27 Jan 2022 19:48:48 -0500 |
| parents | 39189dd94f2c |
| children | 3260926d7e7e |
comparison
equal
deleted
inserted
replaced
| 6603:57dc15ad648d | 6604:0d99ae7c8de6 |
|---|---|
| 117 ('test', '2', 'bar')]) | 117 ('test', '2', 'bar')]) |
| 118 def test_extremewords(self): | 118 def test_extremewords(self): |
| 119 """Testing too short or too long words.""" | 119 """Testing too short or too long words.""" |
| 120 | 120 |
| 121 # skip this for FTS test | 121 # skip this for FTS test |
| 122 if isinstance(self,sqliteFtsIndexerTest): | 122 if ( isinstance(self,sqliteFtsIndexerTest) or |
| 123 isinstance(self,postgresqlFtsIndexerTest)): | |
| 123 pytest.skip("extremewords not tested for native FTS backends") | 124 pytest.skip("extremewords not tested for native FTS backends") |
| 124 | 125 |
| 125 short = "b" | 126 short = "b" |
| 126 long = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" | 127 long = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" |
| 127 self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) | 128 self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) |
| 138 # but only considered length(word)>=3 to be significant | 139 # but only considered length(word)>=3 to be significant |
| 139 self.dex.add_text(('test', '5', 'a'), 'blah py %s %s' | 140 self.dex.add_text(('test', '5', 'a'), 'blah py %s %s' |
| 140 % (short, long)) | 141 % (short, long)) |
| 141 self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')]) | 142 self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')]) |
| 142 | 143 |
| 143 def test_casesensitity(self): | 144 def test_casesensitivity(self): |
| 144 """Test if searches are case-in-sensitive.""" | 145 """Test if searches are case-in-sensitive.""" |
| 145 self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb') | 146 self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb') |
| 146 self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB') | 147 self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB') |
| 147 self.assertSeqEqual(self.dex.find(['aaaa']), | 148 self.assertSeqEqual(self.dex.find(['aaaa']), |
| 148 [('test', '1', 'a'), | 149 [('test', '1', 'a'), |
| 224 RDBMSIndexerTest.setUp(self) | 225 RDBMSIndexerTest.setUp(self) |
| 225 def tearDown(self): | 226 def tearDown(self): |
| 226 RDBMSIndexerTest.tearDown(self) | 227 RDBMSIndexerTest.tearDown(self) |
| 227 postgresqlOpener.tearDown(self) | 228 postgresqlOpener.tearDown(self) |
| 228 | 229 |
| 229 """ | 230 |
| 230 @skip_postgresql | 231 @skip_postgresql |
| 231 class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest): | 232 class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest): |
| 232 def setUp(self): | 233 def setUp(self): |
| 233 postgresqlOpener.setUp(self) | 234 postgresqlOpener.setUp(self) |
| 234 RDBMSIndexerTest.setUp(self) | 235 RDBMSIndexerTest.setUp(self) |
| 236 from roundup.backends.indexer_postgresql_fts import Indexer | |
| 237 self.dex = Indexer(self.db) | |
| 238 self.dex.db = self.db | |
| 239 | |
| 235 def tearDown(self): | 240 def tearDown(self): |
| 236 RDBMSIndexerTest.tearDown(self) | 241 RDBMSIndexerTest.tearDown(self) |
| 237 postgresqlOpener.tearDown(self) | 242 postgresqlOpener.tearDown(self) |
| 238 """ | 243 |
| 244 def test_websearch_syntax(self): | |
| 245 """Test searches using websearch_to_tsquery. These never throw | |
| 246 errors regardless of how wacky the input. | |
| 247 """ | |
| 248 | |
| 249 self.dex.add_text(('test', '1', 'foo'), 'a the hello world') | |
| 250 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') | |
| 251 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') | |
| 252 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') | |
| 253 self.dex.add_text(('test', '5', 'foo'), 'a car drove') | |
| 254 self.dex.add_text(('test', '6', 'foo'), 'a car driving itself') | |
| 255 self.dex.add_text(('test', '7', 'foo'), "let's drive in the car") | |
| 256 self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie') | |
| 257 | |
| 258 # test two separate words for sanity | |
| 259 self.assertSeqEqual(self.dex.find(['"hello" "world"']), | |
| 260 [('test', '1', 'foo'), | |
| 261 ('test', '3', 'foo'), | |
| 262 ('test', '4', 'foo') | |
| 263 ]) | |
| 264 # now check the phrase | |
| 265 self.assertSeqEqual(self.dex.find(['"hello world"']), | |
| 266 [('test', '1', 'foo'), | |
| 267 ]) | |
| 268 | |
| 269 # test negation | |
| 270 self.assertSeqEqual(self.dex.find(['hello world -blech']), | |
| 271 [('test', '1', 'foo'), | |
| 272 ('test', '3', 'foo'), | |
| 273 ]) | |
| 274 | |
| 275 # phrase negation | |
| 276 self.assertSeqEqual(self.dex.find(['hello world -"blah hello"']), | |
| 277 [('test', '1', 'foo'), | |
| 278 ('test', '4', 'foo'), | |
| 279 ]) | |
| 280 | |
| 281 # test without or | |
| 282 self.assertSeqEqual(self.dex.find(['blah blech']), | |
| 283 [('test', '4', 'foo'), | |
| 284 ]) | |
| 285 | |
| 286 # test with or | |
| 287 self.assertSeqEqual(self.dex.find(['blah or blech']), | |
| 288 [ ('test', '2', 'foo'), | |
| 289 ('test', '3', 'foo'), | |
| 290 ('test', '4', 'foo'), | |
| 291 ]) | |
| 292 | |
| 293 # stemmer test for english | |
| 294 self.assertSeqEqual(self.dex.find(['ts:drive']), | |
| 295 [('test', '6', 'foo'), | |
| 296 ('test', '7', 'foo'), | |
| 297 ('test', '8', 'foo') | |
| 298 ]) | |
| 299 | |
| 300 # stemmer is not disabled by quotes 8-( | |
| 301 self.assertSeqEqual(self.dex.find(['ts:"drive"']), | |
| 302 [('test', '6', 'foo'), | |
| 303 ('test', '7', 'foo'), | |
| 304 ('test', '8', 'foo') | |
| 305 ]) | |
| 306 | |
| 307 | |
| 308 # this is missing ts: at the start, so uses the websearch | |
| 309 # parser. We search for operator characters and wanr the user | |
| 310 # Otherwise "hello <-> world" is the same as "hello world" | |
| 311 # and is not a phrase search. | |
| 312 with self.assertRaises(IndexerQueryError) as ctx: | |
| 313 self.dex.find(['hello <-> world']) | |
| 314 | |
| 315 self.assertIn('do a tsquery search', ctx.exception.args[0]) | |
| 316 | |
| 317 def test_tsquery_syntax(self): | |
| 318 """Because websearch_to_tsquery doesn't allow prefix searches, | |
| 319 near searches with any value except 1 (phrase search), allow | |
| 320 use of to_tsquery by prefixing the search term wih ts:. | |
| 321 | |
| 322 However, unlike websearch_to_tsquery, this will throw a | |
| 323 psycopg2.errors.SyntaxError on bad input. SyntaxError is | |
| 324 re-raised as IndexerQueryError. But it makes a bunch of | |
| 325 useful expert functionality available. | |
| 326 | |
| 327 """ | |
| 328 | |
| 329 self.dex.add_text(('test', '1', 'foo'), 'a the hello world') | |
| 330 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') | |
| 331 self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') | |
| 332 self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') | |
| 333 self.dex.add_text(('test', '5', 'foo'), 'a car drove') | |
| 334 self.dex.add_text(('test', '6', 'foo'), 'a car driving itself') | |
| 335 self.dex.add_text(('test', '7', 'foo'), "let's drive in the car") | |
| 336 self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie') | |
| 337 self.dex.db.commit() | |
| 338 | |
| 339 # test two separate words for sanity | |
| 340 self.assertSeqEqual(self.dex.find(['ts:hello & world']), | |
| 341 [('test', '1', 'foo'), | |
| 342 ('test', '3', 'foo'), | |
| 343 ('test', '4', 'foo') | |
| 344 ]) | |
| 345 # now check the phrase | |
| 346 self.assertSeqEqual(self.dex.find(['ts:hello <-> world']), | |
| 347 [('test', '1', 'foo'), | |
| 348 ]) | |
| 349 | |
| 350 # test negation | |
| 351 self.assertSeqEqual(self.dex.find(['ts:hello & world & !blech']), | |
| 352 [('test', '1', 'foo'), | |
| 353 ('test', '3', 'foo'), | |
| 354 ]) | |
| 355 | |
| 356 self.assertSeqEqual(self.dex.find( | |
| 357 ['ts:hello & world & !(blah <-> hello)']), | |
| 358 [('test', '1', 'foo'), | |
| 359 ('test', '4', 'foo'), | |
| 360 ]) | |
| 361 | |
| 362 # test without or | |
| 363 self.assertSeqEqual(self.dex.find(['ts:blah & blech']), | |
| 364 [('test', '4', 'foo'), | |
| 365 ]) | |
| 366 | |
| 367 # test with or | |
| 368 self.assertSeqEqual(self.dex.find(['ts:blah | blech']), | |
| 369 [ ('test', '2', 'foo'), | |
| 370 ('test', '3', 'foo'), | |
| 371 ('test', '4', 'foo'), | |
| 372 ]) | |
| 373 # stemmer test for english | |
| 374 self.assertSeqEqual(self.dex.find(['ts:drive']), | |
| 375 [('test', '6', 'foo'), | |
| 376 ('test', '7', 'foo'), | |
| 377 ('test', '8', 'foo') | |
| 378 ]) | |
| 379 | |
| 380 # stemmer is not disabled by quotes 8-( | |
| 381 self.assertSeqEqual(self.dex.find(['ts:"drive"']), | |
| 382 [('test', '6', 'foo'), | |
| 383 ('test', '7', 'foo'), | |
| 384 ('test', '8', 'foo') | |
| 385 ]) | |
| 386 | |
| 387 | |
| 388 # test with syntax error | |
| 389 with self.assertRaises(IndexerQueryError) as ctx: | |
| 390 self.dex.find(['ts:blah blech']) | |
| 391 | |
| 392 self.assertEqual(ctx.exception.args[0], | |
| 393 'syntax error in tsquery: "blah blech"\n') | |
| 394 | |
| 395 # now check the phrase Note unlike sqlite, order matters, | |
| 396 # hello must come first. | |
| 397 self.assertSeqEqual(self.dex.find(['ts:hello <-> world']), | |
| 398 [('test', '1', 'foo'), | |
| 399 ]) | |
| 400 | |
| 401 # now check the phrase with explicitly 1 intervening item | |
| 402 self.assertSeqEqual(self.dex.find(['ts:hello <2> world']), | |
| 403 [('test', '3', 'foo'), | |
| 404 ]) | |
| 405 # now check the phrase with near explicitly 1 or 3 intervening items | |
| 406 self.assertSeqEqual(self.dex.find([ | |
| 407 'ts:(hello <4> world) | (hello<2>world)']), | |
| 408 [('test', '3', 'foo'), | |
| 409 ('test', '4', 'foo'), | |
| 410 ]) | |
| 411 | |
| 412 # now check the phrase with near explicitly 3 intervening item | |
| 413 # with prefix for world. | |
| 414 self.assertSeqEqual(self.dex.find(['ts:hello <4> wor:*']), | |
| 415 [('test', '4', 'foo'), | |
| 416 ]) | |
| 417 | |
| 418 def test_invalid_language(self): | |
| 419 import psycopg2 | |
| 420 | |
| 421 from roundup.configuration import IndexerOption | |
| 422 IndexerOption.valid_langs.append("foo") | |
| 423 self.db.config["INDEXER_LANGUAGE"] = "foo" | |
| 424 | |
| 425 with self.assertRaises(psycopg2.errors.UndefinedObject) as ctx: | |
| 426 # psycopg2.errors.UndefinedObject: text search configuration | |
| 427 # "foo" does not exist | |
| 428 self.dex.add_text(('test', '1', 'foo'), 'a the hello world') | |
| 429 self.assertIn('search configuration "foo" does', ctx.exception.args[0]) | |
| 430 self.db.rollback() | |
| 431 | |
| 432 with self.assertRaises(ValueError) as ctx: | |
| 433 self.dex.find(['"hello" "world"']) | |
| 434 self.assertIn('search configuration "foo" does', ctx.exception.args[0]) | |
| 435 self.db.rollback() | |
| 436 | |
| 437 self.db.config["INDEXER_LANGUAGE"] = "english" | |
| 239 | 438 |
| 240 @skip_mysql | 439 @skip_mysql |
| 241 class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest): | 440 class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest): |
| 242 def setUp(self): | 441 def setUp(self): |
| 243 mysqlOpener.setUp(self) | 442 mysqlOpener.setUp(self) |
| 252 | 451 |
| 253 class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): | 452 class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): |
| 254 def setUp(self): | 453 def setUp(self): |
| 255 RDBMSIndexerTest.setUp(self) | 454 RDBMSIndexerTest.setUp(self) |
| 256 from roundup.backends.indexer_sqlite_fts import Indexer | 455 from roundup.backends.indexer_sqlite_fts import Indexer |
| 257 self.dex = Indexer(db) | 456 self.dex = Indexer(self.db) |
| 258 self.dex.db = self.db | 457 self.dex.db = self.db |
| 259 | 458 |
| 260 def test_phrase_and_near(self): | 459 def test_phrase_and_near(self): |
| 261 self.dex.add_text(('test', '1', 'foo'), 'a the hello world') | 460 self.dex.add_text(('test', '1', 'foo'), 'a the hello world') |
| 262 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') | 461 self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') |
