Mercurial > p > roundup > code
diff roundup/backends/indexer_whoosh.py @ 6034:cb76bb8bfffd
flake8 whitespace fixes.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Fri, 03 Jan 2020 21:24:09 -0500 |
| parents | 56c9bcdea47f |
| children |
line wrap: on
line diff
--- a/roundup/backends/indexer_whoosh.py Fri Jan 03 21:22:36 2020 -0500 +++ b/roundup/backends/indexer_whoosh.py Fri Jan 03 21:24:09 2020 -0500 @@ -1,12 +1,13 @@ ''' This implements the full-text indexer using Whoosh. ''' -import re, os +import os from whoosh import fields, qparser, index, query, analysis from roundup.backends.indexer_common import Indexer as IndexerBase from roundup.anypy.strings import us2u + class Indexer(IndexerBase): def __init__(self, db): IndexerBase.__init__(self, db) @@ -23,13 +24,13 @@ # StandardAnalyzer lowercases all words and configure it to # block stopwords and words with lengths not between # self.minlength and self.maxlength from indexer_common - stopfilter = analysis.StandardAnalyzer( #stoplist=self.stopwords, + stopfilter = analysis.StandardAnalyzer( #stoplist=self.stopwords, minsize=self.minlength, maxsize=self.maxlength) os.mkdir(path) schema = fields.Schema(identifier=fields.ID(stored=True, unique=True), - content=fields.TEXT(analyzer=stopfilter)) + content=fields.TEXT(analyzer=stopfilter)) index.create_in(path, schema) self.index = index.open_dir(path) return self.index @@ -84,7 +85,7 @@ # We use the identifier twice: once in the actual "text" being # indexed so we can search on it, and again as the "data" being # indexed so we know what we're matching when we get results - identifier = u"%s:%s:%s"%identifier + identifier = u"%s:%s:%s" % identifier # FIXME need to enhance this to handle the whoosh.store.LockError # that maybe raised if there is already another process with a lock. @@ -111,19 +112,18 @@ * more rules here ''' - wordlist = [ word for word in wordlist - if (self.minlength <= len(word) <= self.maxlength) and - not self.is_stopword(word.upper()) ] + wordlist = [word for word in wordlist + if (self.minlength <= len(word) <= self.maxlength) and + not self.is_stopword(word.upper())] if not wordlist: return {} searcher = self._get_searcher() - q = query.And([ query.FuzzyTerm("content", word.lower()) - for word in wordlist ]) + q = query.And([query.FuzzyTerm("content", word.lower()) + for word in wordlist]) results = searcher.search(q, limit=None) return [tuple(result["identifier"].split(':')) for result in results] -
