Mercurial > p > roundup > code
view test/test_indexer.py @ 6814:3f60a71b0812
Summary: Support selecion session/otk data store. Add redis as data store.
Allow admin to select the backend data store. Compatibility matrix:
main\/ session>| anydbm | sqlite | redis | mysql | postgresql |
anydbm | D | | X | | |
sqlite | X | D | X | | |
mysql | | | | D | |
postgresql | | | | | D |
--------------------------------------------------------------+
D - default if unconfigured, X - compatible choice
DETAILS
roundup/configuration.py:
add config.ini section sessiondb with settings: backend and redis_url.
CHANGES.txt, doc/admin_guide.txt, doc/installation.txt, doc/upgrading.txt:
doc on config of session db and redis. Plus some other fixes:
admin - clarified why we do not drop __words and __testids
table in native-fts conversion. TYpo fix.
upgrading - doc how you can keep using anydbm for session data with
sqlite. Fix dupe sentence in an upgrading config.ini
section.
roundup/backends/back_anydbm.py, roundup/backends/back_sqlite.py:
code to support redis, redis/anydbm backends respectively.
roundup/backends/sessions_redis.py
new storage backend for redis.
roundup/rest.py, roundup/cgi/actions.py, roundup/cgi/templating.py
redis uses a different way of calculating lifetime/timestamp.
Since expiration of an item occurred if its timestamp was more
than 1 week old, code would calculate:
now - 1 week + lifetime.
But this results in faster expiration in redis if used for
lifetime/timestamp.
Convert code to use the lifetime() method in BasicDatabase
that generates the right timestamp for each backend.
test/session_common.py:
added tests for more cases, get without default, getall non-existing
key etc. timestamp test changed to use new self.get_ts which is
overridden in other tests. Test that datatypes survive storage.
test/test_redis_session.py:
test redis session store with sqlite and anydbm primary databases
test/test_anydbm.py, test/test_sqlite.py
add test to make sure the databases are properly set up
sqlite - add test cases where anydbm is used as datastore
anydbm - remove updateTimestamp override add get_ts().
test/test_config.py
tests on redis_url and compatibility on choice of sessiondb backend
.travis.yml:
add redis db and redis-py
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Thu, 04 Aug 2022 14:41:58 -0400 |
| parents | 0d99ae7c8de6 |
| children | 3260926d7e7e |
line wrap: on
line source
# Copyright (c) 2002 ekit.com Inc (http://www.ekit-inc.com/) # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import os, unittest, shutil import pytest from roundup.backends import get_backend, have_backend from roundup.backends.indexer_rdbms import Indexer from roundup.cgi.exceptions import IndexerQueryError # borrow from other tests from .db_test_base import setupSchema, config from .test_postgresql import postgresqlOpener, skip_postgresql from .test_mysql import mysqlOpener, skip_mysql from .test_sqlite import sqliteOpener try: import xapian skip_xapian = lambda func, *args, **kwargs: func except ImportError: # FIX: workaround for a bug in pytest.mark.skip(): # https://github.com/pytest-dev/pytest/issues/568 from .pytest_patcher import mark_class skip_xapian = mark_class(pytest.mark.skip( "Skipping Xapian indexer tests: 'xapian' not installed")) try: import whoosh skip_whoosh = lambda func, *args, **kwargs: func except ImportError: # FIX: workaround for a bug in pytest.mark.skip(): # https://github.com/pytest-dev/pytest/issues/568 from .pytest_patcher import mark_class skip_whoosh = mark_class(pytest.mark.skip( "Skipping Whoosh indexer tests: 'whoosh' not installed")) class db: class config(dict): DATABASE = 'test-index' config = config() config[('main', 'indexer_stopwords')] = [] config[('main', 'indexer_language')] = "english" class IndexerTest(unittest.TestCase): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') os.mkdir('test-index/files') from roundup.backends.indexer_dbm import Indexer self.dex = Indexer(db) self.dex.load_index() def assertSeqEqual(self, s1, s2): # First argument is the db result we're testing, second is the # desired result. Some db results don't have iterable rows, so we # have to work around that. # Also work around some dbs not returning items in the expected # order. s1 = list([tuple([r[n] for n in range(len(r))]) for r in s1]) s1.sort() if s1 != s2: self.fail('contents of %r != %r'%(s1, s2)) def test_basics(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah']), [('test', '2', 'foo')]) self.assertSeqEqual(self.dex.find(['blah', 'hello']), []) def test_change(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), 'a the hello') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_clear(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'blah blah the world') self.assertSeqEqual(self.dex.find(['world']), [('test', '1', 'foo'), ('test', '2', 'foo')]) self.dex.add_text(('test', '1', 'foo'), '') self.assertSeqEqual(self.dex.find(['world']), [('test', '2', 'foo')]) def test_stopwords(self): """Test that we can find a text with a stopword in it.""" stopword = "with" self.assertTrue(self.dex.is_stopword(stopword.upper())) self.dex.add_text(('test', '1', 'bar'), '%s hello world' % stopword) self.dex.add_text(('test', '2', 'bar'), 'blah a %s world' % stopword) self.dex.add_text(('test', '3', 'bar'), 'blah Blub river') self.dex.add_text(('test', '4', 'bar'), 'blah river %s' % stopword) self.assertSeqEqual(self.dex.find(['with','world']), [('test', '1', 'bar'), ('test', '2', 'bar')]) def test_extremewords(self): """Testing too short or too long words.""" # skip this for FTS test if ( isinstance(self,sqliteFtsIndexerTest) or isinstance(self,postgresqlFtsIndexerTest)): pytest.skip("extremewords not tested for native FTS backends") short = "b" long = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz" self.dex.add_text(('test', '1', 'a'), '%s hello world' % short) self.dex.add_text(('test', '2', 'a'), 'blah a %s world' % short) self.dex.add_text(('test', '3', 'a'), 'blah Blub river') self.dex.add_text(('test', '4', 'a'), 'blah river %s %s' % (short, long)) self.assertSeqEqual(self.dex.find([short,'world', long, short]), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find([long]),[]) # special test because some faulty code indexed length(word)>=2 # but only considered length(word)>=3 to be significant self.dex.add_text(('test', '5', 'a'), 'blah py %s %s' % (short, long)) self.assertSeqEqual(self.dex.find(["py"]), [('test', '5', 'a')]) def test_casesensitivity(self): """Test if searches are case-in-sensitive.""" self.dex.add_text(('test', '1', 'a'), 'aaaa bbbb') self.dex.add_text(('test', '2', 'a'), 'aAaa BBBB') self.assertSeqEqual(self.dex.find(['aaaa']), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find(['BBBB']), [('test', '1', 'a'), ('test', '2', 'a')]) def test_wordsplitting(self): """Test if word splitting works.""" self.dex.add_text(('test', '1', 'a'), 'aaaa-aaa bbbb*bbb') self.dex.add_text(('test', '2', 'a'), 'aaaA-aaa BBBB*BBB') for k in 'aaaa', 'aaa', 'bbbb', 'bbb': self.assertSeqEqual(self.dex.find([k]), [('test', '1', 'a'), ('test', '2', 'a')]) def test_manyresults(self): """Test if searches find many results.""" for i in range(123): self.dex.add_text(('test', str(i), 'many'), 'many') self.assertEqual(len(self.dex.find(['many'])), 123) def test_unicode(self): """Test with unicode words. see: https://issues.roundup-tracker.org/issue1344046""" russian=u'\u0440\u0443\u0441\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442Spr\xfcnge' german=u'Spr\xfcnge' self.dex.add_text(('test', '1', 'a'), german ) self.dex.add_text(('test', '2', 'a'), russian + u' ' + german ) self.assertSeqEqual(self.dex.find([ u'Spr\xfcnge']), [('test', '1', 'a'), ('test', '2', 'a')]) self.assertSeqEqual(self.dex.find([u'\u0440\u0443\u0441\u0441\u043a\u0438\u0439']), [('test', '2', 'a')]) def tearDown(self): shutil.rmtree('test-index') @skip_whoosh class WhooshIndexerTest(IndexerTest): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') from roundup.backends.indexer_whoosh import Indexer self.dex = Indexer(db) def tearDown(self): shutil.rmtree('test-index') @skip_xapian class XapianIndexerTest(IndexerTest): def setUp(self): if os.path.exists('test-index'): shutil.rmtree('test-index') os.mkdir('test-index') from roundup.backends.indexer_xapian import Indexer self.dex = Indexer(db) def tearDown(self): shutil.rmtree('test-index') class RDBMSIndexerTest(object): def setUp(self): # remove previous test, ignore errors if os.path.exists(config.DATABASE): shutil.rmtree(config.DATABASE) self.db = self.module.Database(config, 'admin') self.dex = Indexer(self.db) def tearDown(self): if hasattr(self, 'db'): self.db.close() if os.path.exists(config.DATABASE): shutil.rmtree(config.DATABASE) @skip_postgresql class postgresqlIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest): def setUp(self): postgresqlOpener.setUp(self) RDBMSIndexerTest.setUp(self) def tearDown(self): RDBMSIndexerTest.tearDown(self) postgresqlOpener.tearDown(self) @skip_postgresql class postgresqlFtsIndexerTest(postgresqlOpener, RDBMSIndexerTest, IndexerTest): def setUp(self): postgresqlOpener.setUp(self) RDBMSIndexerTest.setUp(self) from roundup.backends.indexer_postgresql_fts import Indexer self.dex = Indexer(self.db) self.dex.db = self.db def tearDown(self): RDBMSIndexerTest.tearDown(self) postgresqlOpener.tearDown(self) def test_websearch_syntax(self): """Test searches using websearch_to_tsquery. These never throw errors regardless of how wacky the input. """ self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.dex.add_text(('test', '5', 'foo'), 'a car drove') self.dex.add_text(('test', '6', 'foo'), 'a car driving itself') self.dex.add_text(('test', '7', 'foo'), "let's drive in the car") self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie') # test two separate words for sanity self.assertSeqEqual(self.dex.find(['"hello" "world"']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo') ]) # now check the phrase self.assertSeqEqual(self.dex.find(['"hello world"']), [('test', '1', 'foo'), ]) # test negation self.assertSeqEqual(self.dex.find(['hello world -blech']), [('test', '1', 'foo'), ('test', '3', 'foo'), ]) # phrase negation self.assertSeqEqual(self.dex.find(['hello world -"blah hello"']), [('test', '1', 'foo'), ('test', '4', 'foo'), ]) # test without or self.assertSeqEqual(self.dex.find(['blah blech']), [('test', '4', 'foo'), ]) # test with or self.assertSeqEqual(self.dex.find(['blah or blech']), [ ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) # stemmer test for english self.assertSeqEqual(self.dex.find(['ts:drive']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo') ]) # stemmer is not disabled by quotes 8-( self.assertSeqEqual(self.dex.find(['ts:"drive"']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo') ]) # this is missing ts: at the start, so uses the websearch # parser. We search for operator characters and wanr the user # Otherwise "hello <-> world" is the same as "hello world" # and is not a phrase search. with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['hello <-> world']) self.assertIn('do a tsquery search', ctx.exception.args[0]) def test_tsquery_syntax(self): """Because websearch_to_tsquery doesn't allow prefix searches, near searches with any value except 1 (phrase search), allow use of to_tsquery by prefixing the search term wih ts:. However, unlike websearch_to_tsquery, this will throw a psycopg2.errors.SyntaxError on bad input. SyntaxError is re-raised as IndexerQueryError. But it makes a bunch of useful expert functionality available. """ self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.dex.add_text(('test', '5', 'foo'), 'a car drove') self.dex.add_text(('test', '6', 'foo'), 'a car driving itself') self.dex.add_text(('test', '7', 'foo'), "let's drive in the car") self.dex.add_text(('test', '8', 'foo'), 'a drive-in movie') self.dex.db.commit() # test two separate words for sanity self.assertSeqEqual(self.dex.find(['ts:hello & world']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo') ]) # now check the phrase self.assertSeqEqual(self.dex.find(['ts:hello <-> world']), [('test', '1', 'foo'), ]) # test negation self.assertSeqEqual(self.dex.find(['ts:hello & world & !blech']), [('test', '1', 'foo'), ('test', '3', 'foo'), ]) self.assertSeqEqual(self.dex.find( ['ts:hello & world & !(blah <-> hello)']), [('test', '1', 'foo'), ('test', '4', 'foo'), ]) # test without or self.assertSeqEqual(self.dex.find(['ts:blah & blech']), [('test', '4', 'foo'), ]) # test with or self.assertSeqEqual(self.dex.find(['ts:blah | blech']), [ ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) # stemmer test for english self.assertSeqEqual(self.dex.find(['ts:drive']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo') ]) # stemmer is not disabled by quotes 8-( self.assertSeqEqual(self.dex.find(['ts:"drive"']), [('test', '6', 'foo'), ('test', '7', 'foo'), ('test', '8', 'foo') ]) # test with syntax error with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['ts:blah blech']) self.assertEqual(ctx.exception.args[0], 'syntax error in tsquery: "blah blech"\n') # now check the phrase Note unlike sqlite, order matters, # hello must come first. self.assertSeqEqual(self.dex.find(['ts:hello <-> world']), [('test', '1', 'foo'), ]) # now check the phrase with explicitly 1 intervening item self.assertSeqEqual(self.dex.find(['ts:hello <2> world']), [('test', '3', 'foo'), ]) # now check the phrase with near explicitly 1 or 3 intervening items self.assertSeqEqual(self.dex.find([ 'ts:(hello <4> world) | (hello<2>world)']), [('test', '3', 'foo'), ('test', '4', 'foo'), ]) # now check the phrase with near explicitly 3 intervening item # with prefix for world. self.assertSeqEqual(self.dex.find(['ts:hello <4> wor:*']), [('test', '4', 'foo'), ]) def test_invalid_language(self): import psycopg2 from roundup.configuration import IndexerOption IndexerOption.valid_langs.append("foo") self.db.config["INDEXER_LANGUAGE"] = "foo" with self.assertRaises(psycopg2.errors.UndefinedObject) as ctx: # psycopg2.errors.UndefinedObject: text search configuration # "foo" does not exist self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.assertIn('search configuration "foo" does', ctx.exception.args[0]) self.db.rollback() with self.assertRaises(ValueError) as ctx: self.dex.find(['"hello" "world"']) self.assertIn('search configuration "foo" does', ctx.exception.args[0]) self.db.rollback() self.db.config["INDEXER_LANGUAGE"] = "english" @skip_mysql class mysqlIndexerTest(mysqlOpener, RDBMSIndexerTest, IndexerTest): def setUp(self): mysqlOpener.setUp(self) RDBMSIndexerTest.setUp(self) def tearDown(self): RDBMSIndexerTest.tearDown(self) mysqlOpener.tearDown(self) class sqliteIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): pass class sqliteFtsIndexerTest(sqliteOpener, RDBMSIndexerTest, IndexerTest): def setUp(self): RDBMSIndexerTest.setUp(self) from roundup.backends.indexer_sqlite_fts import Indexer self.dex = Indexer(self.db) self.dex.db = self.db def test_phrase_and_near(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') # test two separate words for sanity self.assertSeqEqual(self.dex.find(['"hello" "world"']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo') ]) # now check the phrase self.assertSeqEqual(self.dex.find(['"hello world"']), [('test', '1', 'foo'), ]) # now check the phrase with near explicitly 0 intervening items self.assertSeqEqual(self.dex.find(['NEAR(hello world, 0)']), [('test', '1', 'foo'), ]) # now check the phrase with near explicitly 1 intervening item self.assertSeqEqual(self.dex.find(['NEAR(hello world, 1)']), [('test', '1', 'foo'), ('test', '3', 'foo'), ]) # now check the phrase with near explicitly 3 intervening item self.assertSeqEqual(self.dex.find(['NEAR(hello world, 3)']), [('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) def test_prefix(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.assertSeqEqual(self.dex.find(['hel*']), [('test', '1', 'foo'), ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo') ]) def test_bool_start(self): self.dex.add_text(('test', '1', 'foo'), 'a the hello world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') self.assertSeqEqual(self.dex.find(['hel* NOT helh NOT blech']), [('test', '1', 'foo'), ('test', '3', 'foo'), ]) self.assertSeqEqual(self.dex.find(['hel* NOT helh NOT blech OR the']), [('test', '1', 'foo'), ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) self.assertSeqEqual(self.dex.find(['helh OR hello']), [('test', '1', 'foo'), ('test', '2', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) self.assertSeqEqual(self.dex.find(['helh AND hello']), []) # matches if line starts with hello self.assertSeqEqual(self.dex.find(['^hello']), [ ('test', '4', 'foo'), ]) self.assertSeqEqual(self.dex.find(['hello']), [ ('test', '1', 'foo'), ('test', '3', 'foo'), ('test', '4', 'foo'), ]) def test_query_errors(self): """test query phrases that generate an error. Also test the correction""" self.dex.add_text(('test', '1', 'foo'), 'a the hello-world') self.dex.add_text(('test', '2', 'foo'), 'helh blah blah the world') self.dex.add_text(('test', '3', 'foo'), 'blah hello the world') self.dex.add_text(('test', '4', 'foo'), 'hello blah blech the world') # handle known error that roundup recognizes and tries to diagnose with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['the hello-world']) error = ( "Search failed. Try quoting any terms that include a '-' " "and retry the search.") self.assertEqual(str(ctx.exception), error) self.assertSeqEqual(self.dex.find(['the "hello-world"']), [('test', '1', 'foo'), ]) # handle known error that roundup recognizes and tries to diagnose with self.assertRaises(IndexerQueryError) as ctx: self.dex.find(['hello world + ^the']) error = 'Query error: syntax error near "^"' self.assertEqual(str(ctx.exception), error) # vim: set filetype=python ts=4 sw=4 et si
