changeset 6593:e70e2789bc2c

issue2551189 - increase text search maxlength This removes I think all the magic references to 25 and 30 (varchar size) and replaces them with references to maxlength or maxlength+5. I am not sure why the db column is 5 characters larger than the size of what should be the max size of a word, but I'll keep the buffer of 5 as making it 1/5 the size of maxlength makes less sense. Also added tests for fts search in templating which were missing. Added postgres, mysql and sqlite native indexing backends in which to test fts. Added fts test to native-fts as well to make sure it's working. I want to commit this now for CI. Todo: add test cases for the use of FTS in the csv output in actions.py. There is no test coverage of the match case there. change maxlength to a higher value (50) as requested in the ticket. Modify existing extremewords test cases to allow words > 25 and < 51 write code to migrate column sizes for mysql and postgresql to match maxlength I will roll this into the version 7 schema update that supports use of database fts support.
author John Rouillard <rouilj@ieee.org>
date Tue, 25 Jan 2022 13:22:00 -0500
parents 828e2eaee7cd
children f3a0cb617ea8
files roundup/backends/back_mysql.py roundup/backends/back_postgresql.py roundup/cgi/actions.py roundup/cgi/templating.py test/test_cgi.py
diffstat 5 files changed, 183 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/backends/back_mysql.py	Mon Jan 24 23:23:27 2022 -0500
+++ b/roundup/backends/back_mysql.py	Tue Jan 25 13:22:00 2022 -0500
@@ -234,8 +234,10 @@
         self.sql('''CREATE TABLE __textids (_class VARCHAR(255),
             _itemid VARCHAR(255), _prop VARCHAR(255), _textid INT)
             ENGINE=%s'''%self.mysql_backend)
-        self.sql('''CREATE TABLE __words (_word VARCHAR(30),
-            _textid INT) ENGINE=%s'''%self.mysql_backend)
+        self.sql('''CREATE TABLE __words (_word VARCHAR(%s),
+            _textid INT) ENGINE=%s''' % ((self.indexer.maxlength + 5),
+                                         self.mysql_backend)
+            )
         self.sql('CREATE INDEX words_word_ids ON __words(_word)')
         self.sql('CREATE INDEX words_by_id ON __words (_textid)')
         self.sql('CREATE UNIQUE INDEX __textids_by_props ON '
--- a/roundup/backends/back_postgresql.py	Mon Jan 24 23:23:27 2022 -0500
+++ b/roundup/backends/back_postgresql.py	Tue Jan 25 13:22:00 2022 -0500
@@ -235,8 +235,8 @@
         self.sql('''CREATE TABLE __textids (
             _textid integer primary key, _class VARCHAR(255),
             _itemid VARCHAR(255), _prop VARCHAR(255))''')
-        self.sql('''CREATE TABLE __words (_word VARCHAR(30),
-            _textid integer)''')
+        self.sql('''CREATE TABLE __words (_word VARCHAR(%s),
+            _textid integer)''' % (self.indexer.maxlength + 5))
         self.sql('CREATE INDEX words_word_idx ON __words(_word)')
         self.sql('CREATE INDEX words_by_id ON __words (_textid)')
         self.sql('CREATE UNIQUE INDEX __textids_by_props ON '
--- a/roundup/cgi/actions.py	Mon Jan 24 23:23:27 2022 -0500
+++ b/roundup/cgi/actions.py	Tue Jan 25 13:22:00 2022 -0500
@@ -1433,9 +1433,10 @@
 
         # full-text search
         if request.search_text:
+            indexer = self.db.indexer
             if self.db.indexer.query_language:
                 try:
-                    matches = self.db.indexer.search(
+                    matches = indexer.search(
                         [request.search_text], klass)
                 except Exception as e:
                     error = " ".join(e.args)
@@ -1444,8 +1445,10 @@
                     # trigger error reporting. NotFound isn't right but...
                     raise exceptions.NotFound(error)
             else:
-                matches = self.db.indexer.search(
-                    re.findall(r'\b\w{2,25}\b', request.search_text), klass)
+                matches = indexer.search(
+                    re.findall(r'\b\w{%s,%s}\b' % (indexer.minlength,
+                                                   indexer.maxlength),
+                               request.search_text), klass)
         else:
             matches = None
 
@@ -1609,9 +1612,10 @@
 
         # full-text search
         if request.search_text:
-            if self.db.indexer.query_language:
+            indexer = self.db.indexer
+            if indexer.query_language:
                 try:
-                    matches = self.db.indexer.search(
+                    matches = indexer.search(
                         [request.search_text], klass)
                 except Exception as e:
                     error = " ".join(e.args)
@@ -1620,8 +1624,10 @@
                     # trigger error reporting. NotFound isn't right but...
                     raise exceptions.NotFound(error)
             else:
-                matches = self.db.indexer.search(
-                    re.findall(r'\b\w{2,25}\b', request.search_text),
+                matches = indexer.search(
+                    re.findall(r'\b\w{%s,%s}\b' % (indexer.minlength,
+                                                   indexer.maxlength),
+                               request.search_text),
                     klass)
         else:
             matches = None
--- a/roundup/cgi/templating.py	Mon Jan 24 23:23:27 2022 -0500
+++ b/roundup/cgi/templating.py	Tue Jan 25 13:22:00 2022 -0500
@@ -3318,17 +3318,19 @@
         # get the list of ids we're batching over
         klass = self.client.db.getclass(self.classname)
         if self.search_text:
-            if self.client.db.indexer.query_language:
+            indexer = self.client.db.indexer
+            if indexer.query_language:
                 try:
-                    matches = self.client.db.indexer.search(
+                    matches = indexer.search(
                         [self.search_text], klass)
                 except Exception as e:
                     self.client.add_error_message(" ".join(e.args))
                     raise
             else:
-                matches = self.client.db.indexer.search(
+                matches = indexer.search(
                     [u2s(w.upper()) for w in re.findall(
-                        r'(?u)\b\w{2,25}\b',
+                        r'(?u)\b\w{%s,%s}\b' % (indexer.minlength,
+                                                indexer.maxlength),
                         s2u(self.search_text, "replace")
                     )], klass)
         else:
--- a/test/test_cgi.py	Mon Jan 24 23:23:27 2022 -0500
+++ b/test/test_cgi.py	Tue Jan 25 13:22:00 2022 -0500
@@ -33,6 +33,9 @@
 from . import db_test_base
 from .db_test_base import FormTestParent, setupTracker, FileUpload
 from .cmp_helper import StringFragmentCmpHelper
+from .test_postgresql import skip_postgresql
+from .test_mysql import skip_mysql
+
 
 class FileList:
     def __init__(self, name, *files):
@@ -42,6 +45,24 @@
         for f in self.files:
             yield (self.name, f)
 
+class testFtsQuery(object):
+
+    def testRenderContextFtsQuery(self):
+        self.db.issue.create(title='i1 is found', status="chatting")
+
+        self.client.form=db_test_base.makeForm(
+            { "@ok_message": "ok message", "@template": "index",
+            "@search_text": "found"})
+        self.client.path = 'issue'
+        self.client.determine_context()
+
+        result = self.client.renderContext()
+
+        expected = '">i1 is found</a>'
+
+        self.assertIn(expected, result)
+        self.assertEqual(self.client.response_code, 200)
+
 cm = client.add_message
 class MessageTestCase(unittest.TestCase):
     # Note: Escaping is now handled on a message-by-message basis at a
@@ -1976,7 +1997,7 @@
         self.assertRaises(exceptions.Unauthorised,
             actions.ExportCSVWithIdAction(cl).handle)
 
-class TemplateHtmlRendering(unittest.TestCase):
+class TemplateHtmlRendering(unittest.TestCase, testFtsQuery):
     ''' try to test the rendering code for tal '''
     def setUp(self):
         self.dirname = '_test_template'
@@ -2343,9 +2364,10 @@
         r = t.selectTemplate("user", "subdir/item")
         self.assertEqual("subdir/user.item", r)
 
-class SqliteCgiTest(unittest.TestCase):
+class SqliteNativeFtsCgiTest(unittest.TestCase, testFtsQuery):
     """All of the rest of the tests use anydbm as the backend.
-       This class tests renderError when renderContext fails.
+       In addtion to normal fts test, this class tests renderError
+       when renderContext fails.
        Triggering this error requires the native-fts backend for
        the sqlite db.
     """
@@ -2407,5 +2429,138 @@
         self.assertEqual(result, expected)
         self.assertEqual(self.client.response_code, 400)
 
+class SqliteNativeCgiTest(unittest.TestCase, testFtsQuery):
+    """All of the rest of the tests use anydbm as the backend.
+       This class tests renderContext for fulltext search.
+       Run with sqlite and native indexer.
+    """
+
+    def setUp(self):
+        self.dirname = '_test_template'
+        # set up and open a tracker
+        self.instance = setupTracker(self.dirname, backend="sqlite")
+
+        self.instance.config.INDEXER = "native"
+
+        # open the database
+        self.db = self.instance.open('admin')
+        self.db.tx_Source = "web"
+
+        # create a client instance and hijack write_html
+        self.client = client.Client(self.instance, "user",
+                {'PATH_INFO':'/user', 'REQUEST_METHOD':'POST'},
+                form=db_test_base.makeForm({"@template": "item"}))
+
+        self.client._error_message = []
+        self.client._ok_message = []
+        self.client.db = self.db
+        self.client.userid = '1'
+        self.client.language = ('en',)
+        self.client.session_api = MockNull(_sid="1234567890")
+
+        self.output = []
+        # ugly hack to get html_write to return data here.
+        def html_write(s):
+            self.output.append(s)
+
+        # hijack html_write
+        self.client.write_html = html_write
+
+    def tearDown(self):
+        self.db.close()
+        try:
+            shutil.rmtree(self.dirname)
+        except OSError as error:
+            if error.errno not in (errno.ENOENT, errno.ESRCH): raise
+
+@skip_postgresql
+class PostgresqlNativeCgiTest(unittest.TestCase, testFtsQuery):
+    """All of the rest of the tests use anydbm as the backend.
+       This class tests renderContext for fulltext search.
+       Run with postgresql and native indexer.
+    """
+
+    def setUp(self):
+        self.dirname = '_test_template'
+        # set up and open a tracker
+        self.instance = setupTracker(self.dirname, backend="postgresql")
+
+        self.instance.config.INDEXER = "native"
+
+        # open the database
+        self.db = self.instance.open('admin')
+        self.db.tx_Source = "web"
+
+        # create a client instance and hijack write_html
+        self.client = client.Client(self.instance, "user",
+                {'PATH_INFO':'/user', 'REQUEST_METHOD':'POST'},
+                form=db_test_base.makeForm({"@template": "item"}))
+
+        self.client._error_message = []
+        self.client._ok_message = []
+        self.client.db = self.db
+        self.client.userid = '1'
+        self.client.language = ('en',)
+        self.client.session_api = MockNull(_sid="1234567890")
+
+        self.output = []
+        # ugly hack to get html_write to return data here.
+        def html_write(s):
+            self.output.append(s)
+
+        # hijack html_write
+        self.client.write_html = html_write
+
+    def tearDown(self):
+        self.db.close()
+        try:
+            shutil.rmtree(self.dirname)
+        except OSError as error:
+            if error.errno not in (errno.ENOENT, errno.ESRCH): raise
+
+@skip_mysql
+class MysqlNativeCgiTest(unittest.TestCase, testFtsQuery):
+    """All of the rest of the tests use anydbm as the backend.
+       This class tests renderContext for fulltext search.
+       Run with mysql and native indexer.
+    """
+
+    def setUp(self):
+        self.dirname = '_test_template'
+        # set up and open a tracker
+        self.instance = setupTracker(self.dirname, backend="mysql")
+
+        self.instance.config.INDEXER = "native"
+
+        # open the database
+        self.db = self.instance.open('admin')
+        self.db.tx_Source = "web"
+
+        # create a client instance and hijack write_html
+        self.client = client.Client(self.instance, "user",
+                {'PATH_INFO':'/user', 'REQUEST_METHOD':'POST'},
+                form=db_test_base.makeForm({"@template": "item"}))
+
+        self.client._error_message = []
+        self.client._ok_message = []
+        self.client.db = self.db
+        self.client.userid = '1'
+        self.client.language = ('en',)
+        self.client.session_api = MockNull(_sid="1234567890")
+
+        self.output = []
+        # ugly hack to get html_write to return data here.
+        def html_write(s):
+            self.output.append(s)
+
+        # hijack html_write
+        self.client.write_html = html_write
+
+    def tearDown(self):
+        self.db.close()
+        try:
+            shutil.rmtree(self.dirname)
+        except OSError as error:
+            if error.errno not in (errno.ENOENT, errno.ESRCH): raise
 
 # vim: set filetype=python sts=4 sw=4 et si :

Roundup Issue Tracker: http://roundup-tracker.org/