Mercurial > p > roundup > code
annotate roundup/backends/indexer_whoosh.py @ 5746:fea2b6e54492
Check /rest/data.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Wed, 29 May 2019 22:56:50 -0400 |
| parents | 56c9bcdea47f |
| children | cb76bb8bfffd |
| rev | line source |
|---|---|
|
5096
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
1 ''' This implements the full-text indexer using Whoosh. |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
2 ''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
3 import re, os |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
4 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
5 from whoosh import fields, qparser, index, query, analysis |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
6 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
7 from roundup.backends.indexer_common import Indexer as IndexerBase |
|
5416
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5096
diff
changeset
|
8 from roundup.anypy.strings import us2u |
|
5096
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
9 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
10 class Indexer(IndexerBase): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
11 def __init__(self, db): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
12 IndexerBase.__init__(self, db) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
13 self.db_path = db.config.DATABASE |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
14 self.reindex = 0 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
15 self.writer = None |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
16 self.index = None |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
17 self.deleted = set() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
18 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
19 def _get_index(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
20 if self.index is None: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
21 path = os.path.join(self.db_path, 'whoosh-index') |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
22 if not os.path.exists(path): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
23 # StandardAnalyzer lowercases all words and configure it to |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
24 # block stopwords and words with lengths not between |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
25 # self.minlength and self.maxlength from indexer_common |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
26 stopfilter = analysis.StandardAnalyzer( #stoplist=self.stopwords, |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
27 minsize=self.minlength, |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
28 maxsize=self.maxlength) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
29 os.mkdir(path) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
30 schema = fields.Schema(identifier=fields.ID(stored=True, |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
31 unique=True), |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
32 content=fields.TEXT(analyzer=stopfilter)) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
33 index.create_in(path, schema) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
34 self.index = index.open_dir(path) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
35 return self.index |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
36 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
37 def save_index(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
38 '''Save the changes to the index.''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
39 if not self.writer: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
40 return |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
41 self.writer.commit() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
42 self.deleted = set() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
43 self.writer = None |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
44 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
45 def close(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
46 '''close the indexing database''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
47 pass |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
48 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
49 def rollback(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
50 if not self.writer: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
51 return |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
52 self.writer.cancel() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
53 self.deleted = set() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
54 self.writer = None |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
55 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
56 def force_reindex(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
57 '''Force a reindexing of the database. This essentially |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
58 empties the tables ids and index and sets a flag so |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
59 that the databases are reindexed''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
60 self.reindex = 1 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
61 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
62 def should_reindex(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
63 '''returns True if the indexes need to be rebuilt''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
64 return self.reindex |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
65 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
66 def _get_writer(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
67 if self.writer is None: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
68 self.writer = self._get_index().writer() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
69 return self.writer |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
70 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
71 def _get_searcher(self): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
72 return self._get_index().searcher() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
73 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
74 def add_text(self, identifier, text, mime_type='text/plain'): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
75 ''' "identifier" is (classname, itemid, property) ''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
76 if mime_type != 'text/plain': |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
77 return |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
78 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
79 if not text: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
80 text = u'' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
81 |
|
5416
56c9bcdea47f
Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents:
5096
diff
changeset
|
82 text = us2u(text, "replace") |
|
5096
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
83 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
84 # We use the identifier twice: once in the actual "text" being |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
85 # indexed so we can search on it, and again as the "data" being |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
86 # indexed so we know what we're matching when we get results |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
87 identifier = u"%s:%s:%s"%identifier |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
88 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
89 # FIXME need to enhance this to handle the whoosh.store.LockError |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
90 # that maybe raised if there is already another process with a lock. |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
91 writer = self._get_writer() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
92 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
93 # Whoosh gets upset if a document is deleted twice in one transaction, |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
94 # so we keep a list of the documents we have so far deleted to make |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
95 # sure that we only delete them once. |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
96 if identifier not in self.deleted: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
97 searcher = self._get_searcher() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
98 results = searcher.search(query.Term("identifier", identifier)) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
99 if len(results) > 0: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
100 writer.delete_by_term("identifier", identifier) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
101 self.deleted.add(identifier) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
102 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
103 # Note: use '.lower()' because it seems like Whoosh gets |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
104 # better results that way. |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
105 writer.add_document(identifier=identifier, content=text) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
106 self.save_index() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
107 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
108 def find(self, wordlist): |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
109 '''look up all the words in the wordlist. |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
110 If none are found return an empty dictionary |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
111 * more rules here |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
112 ''' |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
113 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
114 wordlist = [ word for word in wordlist |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
115 if (self.minlength <= len(word) <= self.maxlength) and |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
116 not self.is_stopword(word.upper()) ] |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
117 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
118 if not wordlist: |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
119 return {} |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
120 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
121 searcher = self._get_searcher() |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
122 q = query.And([ query.FuzzyTerm("content", word.lower()) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
123 for word in wordlist ]) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
124 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
125 results = searcher.search(q, limit=None) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
126 |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
127 return [tuple(result["identifier"].split(':')) |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
128 for result in results] |
|
e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
John Rouillard <rouilj@ieee.org>
parents:
diff
changeset
|
129 |
