diff roundup/backends/indexer_xapian.py @ 6353:9d209d2b34ae

Add indexer_language to change stemmer for xapian FTS indexer Nagy Gabor asked how to enable the hungarian stemmer in roundup. This required editing indexer_xapian.py replacing hardcoded "english" term. This value is now exposed in the config file under [main] index_language. This only works for xapian currently.
author John Rouillard <rouilj@ieee.org>
date Sun, 28 Mar 2021 23:34:43 -0400
parents 5bf7b5debb09
children c26b9ce33ae3
line wrap: on
line diff
--- a/roundup/backends/indexer_xapian.py	Sat Mar 27 13:05:50 2021 -0400
+++ b/roundup/backends/indexer_xapian.py	Sun Mar 28 23:34:43 2021 -0400
@@ -6,6 +6,7 @@
 
 from roundup.backends.indexer_common import Indexer as IndexerBase
 from roundup.anypy.strings import b2s, s2b
+from roundup.i18n import _
 
 # TODO: we need to delete documents when a property is *reindexed*
 
@@ -21,6 +22,18 @@
         self.reindex = 0
         self.transaction_active = False
 
+        # self.language defined in IndexerBase.__init__
+        # validate it here
+        try:
+            xapian.Stem(self.language)
+        except xapian.InvalidArgumentError:
+            raise ValueError(
+                _("Invalid indexer_language %(lang)s for xapian indexer\n"
+                  "Valid languages: %(valid)s") % {
+                      "lang": self.language,
+                      "valid": b2s(xapian.Stem.get_available_languages()) }
+            )
+
     def _get_database(self):
         index = os.path.join(self.db_path, 'text-index')
         for n in range(10):
@@ -80,8 +93,7 @@
             #database.begin_transaction()
             #self.transaction_active = True
 
-        # TODO: allow configuration of other languages
-        stemmer = xapian.Stem("english")
+        stemmer = xapian.Stem(self.language)
 
         # We use the identifier twice: once in the actual "text" being
         # indexed so we can search on it, and again as the "data" being
@@ -115,7 +127,7 @@
         database = self._get_database()
 
         enquire = xapian.Enquire(database)
-        stemmer = xapian.Stem("english")
+        stemmer = xapian.Stem(self.language)
         terms = []
         for term in [word.upper() for word in wordlist
                           if self.minlength <= len(word) <= self.maxlength]:

Roundup Issue Tracker: http://roundup-tracker.org/