Mercurial > p > roundup > code
diff roundup/backends/indexer_xapian.py @ 6353:9d209d2b34ae
Add indexer_language to change stemmer for xapian FTS indexer
Nagy Gabor asked how to enable the hungarian stemmer in roundup. This
required editing indexer_xapian.py replacing hardcoded "english"
term. This value is now exposed in the config file under [main]
index_language.
This only works for xapian currently.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sun, 28 Mar 2021 23:34:43 -0400 |
| parents | 5bf7b5debb09 |
| children | c26b9ce33ae3 |
line wrap: on
line diff
--- a/roundup/backends/indexer_xapian.py Sat Mar 27 13:05:50 2021 -0400 +++ b/roundup/backends/indexer_xapian.py Sun Mar 28 23:34:43 2021 -0400 @@ -6,6 +6,7 @@ from roundup.backends.indexer_common import Indexer as IndexerBase from roundup.anypy.strings import b2s, s2b +from roundup.i18n import _ # TODO: we need to delete documents when a property is *reindexed* @@ -21,6 +22,18 @@ self.reindex = 0 self.transaction_active = False + # self.language defined in IndexerBase.__init__ + # validate it here + try: + xapian.Stem(self.language) + except xapian.InvalidArgumentError: + raise ValueError( + _("Invalid indexer_language %(lang)s for xapian indexer\n" + "Valid languages: %(valid)s") % { + "lang": self.language, + "valid": b2s(xapian.Stem.get_available_languages()) } + ) + def _get_database(self): index = os.path.join(self.db_path, 'text-index') for n in range(10): @@ -80,8 +93,7 @@ #database.begin_transaction() #self.transaction_active = True - # TODO: allow configuration of other languages - stemmer = xapian.Stem("english") + stemmer = xapian.Stem(self.language) # We use the identifier twice: once in the actual "text" being # indexed so we can search on it, and again as the "data" being @@ -115,7 +127,7 @@ database = self._get_database() enquire = xapian.Enquire(database) - stemmer = xapian.Stem("english") + stemmer = xapian.Stem(self.language) terms = [] for term in [word.upper() for word in wordlist if self.minlength <= len(word) <= self.maxlength]:
