Mercurial > p > roundup > code
annotate roundup/backends/indexer_xapian.py @ 6478:a35d4e0c4e07
Fix permissions check. Use role not perm check.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Mon, 30 Aug 2021 16:17:46 -0400 |
| parents | c26b9ce33ae3 |
| children | 0b6c54893ec5 |
| rev | line source |
|---|---|
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
1 ''' This implements the full-text indexer using the Xapian indexer. |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
2 ''' |
|
5142
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
3 import re, os, time |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
4 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
5 import xapian |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
6 |
|
3544
5cd1c83dea50
Features and fixes.
Richard Jones <richard@users.sourceforge.net>
parents:
3295
diff
changeset
|
7 from roundup.backends.indexer_common import Indexer as IndexerBase |
|
5491
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
8 from roundup.anypy.strings import b2s, s2b |
|
6353
9d209d2b34ae
Add indexer_language to change stemmer for xapian FTS indexer
John Rouillard <rouilj@ieee.org>
parents:
5964
diff
changeset
|
9 from roundup.i18n import _ |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
10 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
11 # TODO: we need to delete documents when a property is *reindexed* |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
12 |
|
5491
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
13 # Note that Xapian always uses UTF-8 encoded string, see |
|
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
14 # https://xapian.org/docs/bindings/python3/introduction.html#strings: |
|
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
15 # "Where std::string is returned, it's always mapped to bytes in |
|
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
16 # Python..." |
|
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
17 |
|
3544
5cd1c83dea50
Features and fixes.
Richard Jones <richard@users.sourceforge.net>
parents:
3295
diff
changeset
|
18 class Indexer(IndexerBase): |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
19 def __init__(self, db): |
|
3544
5cd1c83dea50
Features and fixes.
Richard Jones <richard@users.sourceforge.net>
parents:
3295
diff
changeset
|
20 IndexerBase.__init__(self, db) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
21 self.db_path = db.config.DATABASE |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
22 self.reindex = 0 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
23 self.transaction_active = False |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
24 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
25 def _get_database(self): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
26 index = os.path.join(self.db_path, 'text-index') |
|
5142
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
27 for n in range(10): |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
28 try: |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
29 # if successful return |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
30 return xapian.WritableDatabase(index, xapian.DB_CREATE_OR_OPEN) |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
31 except xapian.DatabaseLockError: |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
32 # adaptive sleep. Get longer as count increases. |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
33 time_to_sleep = 0.01 * (2 << min(5, n)) |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
34 time.sleep(time_to_sleep) |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
35 # we are back to the for loop |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
36 |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
37 # Get here only if we dropped out of the for loop. |
|
93832cec4c31
issue2550839: Xapian, DatabaseLockError: Unable to get write lock on
John Rouillard <rouilj@ieee.org>
parents:
5108
diff
changeset
|
38 raise xapian.DatabaseLockError("Unable to get lock after 10 retries on %s."%index) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
39 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
40 def save_index(self): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
41 '''Save the changes to the index.''' |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
42 if not self.transaction_active: |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
43 return |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
44 database = self._get_database() |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
45 database.commit_transaction() |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
46 self.transaction_active = False |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
47 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
48 def close(self): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
49 '''close the indexing database''' |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
50 pass |
|
3887
c7363442cdbb
change xapian stemmer to use "new" API
Justus Pendleton <jpend@users.sourceforge.net>
parents:
3555
diff
changeset
|
51 |
|
3555
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
52 def rollback(self): |
|
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
53 if not self.transaction_active: |
|
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
54 return |
|
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
55 database = self._get_database() |
|
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
56 database.cancel_transaction() |
|
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
57 self.transaction_active = False |
|
91c495476db3
pre-release stuff and test fix
Richard Jones <richard@users.sourceforge.net>
parents:
3547
diff
changeset
|
58 |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
59 def force_reindex(self): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
60 '''Force a reindexing of the database. This essentially |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
61 empties the tables ids and index and sets a flag so |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
62 that the databases are reindexed''' |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
63 self.reindex = 1 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
64 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
65 def should_reindex(self): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
66 '''returns True if the indexes need to be rebuilt''' |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
67 return self.reindex |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
68 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
69 def add_text(self, identifier, text, mime_type='text/plain'): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
70 ''' "identifier" is (classname, itemid, property) ''' |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
71 if mime_type != 'text/plain': |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
72 return |
|
3547
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
73 if not text: text = '' |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
74 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
75 # open the database and start a transaction if needed |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
76 database = self._get_database() |
|
4378
477f2a47cbca
- Indexer Xapian, made Xapian 1.2 compatible.
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
4252
diff
changeset
|
77 |
|
477f2a47cbca
- Indexer Xapian, made Xapian 1.2 compatible.
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
4252
diff
changeset
|
78 # XXX: Xapian now supports transactions, |
|
477f2a47cbca
- Indexer Xapian, made Xapian 1.2 compatible.
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
4252
diff
changeset
|
79 # but there is a call to save_index() missing. |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
80 #if not self.transaction_active: |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
81 #database.begin_transaction() |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
82 #self.transaction_active = True |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
83 |
|
6353
9d209d2b34ae
Add indexer_language to change stemmer for xapian FTS indexer
John Rouillard <rouilj@ieee.org>
parents:
5964
diff
changeset
|
84 stemmer = xapian.Stem(self.language) |
|
3547
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
85 |
|
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
86 # We use the identifier twice: once in the actual "text" being |
|
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
87 # indexed so we can search on it, and again as the "data" being |
|
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
88 # indexed so we know what we're matching when we get results |
|
5491
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
89 identifier = s2b('%s:%s:%s'%identifier) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
90 |
|
3547
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
91 # create the new document |
|
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
92 doc = xapian.Document() |
|
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
93 doc.set_data(identifier) |
|
4511
931370d96c34
Xapian indexing improved:
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
4470
diff
changeset
|
94 doc.add_term(identifier, 0) |
|
3547
7728ee93efd2
fix reindexing in Xapian
Richard Jones <richard@users.sourceforge.net>
parents:
3544
diff
changeset
|
95 |
|
4252
2ff6f39aa391
Indexers behaviour made more consistent regarding length of indexed words...
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
3932
diff
changeset
|
96 for match in re.finditer(r'\b\w{%d,%d}\b' |
|
2ff6f39aa391
Indexers behaviour made more consistent regarding length of indexed words...
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
3932
diff
changeset
|
97 % (self.minlength, self.maxlength), |
|
5964
5bf7b5debb09
Fix xapian indexer for unicode
John Rouillard <rouilj@ieee.org>
parents:
5491
diff
changeset
|
98 text.upper(), re.UNICODE): |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
99 word = match.group(0) |
|
3544
5cd1c83dea50
Features and fixes.
Richard Jones <richard@users.sourceforge.net>
parents:
3295
diff
changeset
|
100 if self.is_stopword(word): |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
101 continue |
|
5491
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
102 term = stemmer(s2b(word.lower())) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
103 doc.add_posting(term, match.start(0)) |
|
4511
931370d96c34
Xapian indexing improved:
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
4470
diff
changeset
|
104 |
|
931370d96c34
Xapian indexing improved:
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
4470
diff
changeset
|
105 database.replace_document(identifier, doc) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
106 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
107 def find(self, wordlist): |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
108 '''look up all the words in the wordlist. |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
109 If none are found return an empty dictionary |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
110 * more rules here |
|
3887
c7363442cdbb
change xapian stemmer to use "new" API
Justus Pendleton <jpend@users.sourceforge.net>
parents:
3555
diff
changeset
|
111 ''' |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
112 if not wordlist: |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
113 return {} |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
114 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
115 database = self._get_database() |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
116 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
117 enquire = xapian.Enquire(database) |
|
6353
9d209d2b34ae
Add indexer_language to change stemmer for xapian FTS indexer
John Rouillard <rouilj@ieee.org>
parents:
5964
diff
changeset
|
118 stemmer = xapian.Stem(self.language) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
119 terms = [] |
|
4252
2ff6f39aa391
Indexers behaviour made more consistent regarding length of indexed words...
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
3932
diff
changeset
|
120 for term in [word.upper() for word in wordlist |
|
2ff6f39aa391
Indexers behaviour made more consistent regarding length of indexed words...
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
3932
diff
changeset
|
121 if self.minlength <= len(word) <= self.maxlength]: |
|
2ff6f39aa391
Indexers behaviour made more consistent regarding length of indexed words...
Bernhard Reiter <Bernhard.Reiter@intevation.de>
parents:
3932
diff
changeset
|
122 if not self.is_stopword(term): |
|
5491
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
123 terms.append(stemmer(s2b(term.lower()))) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
124 query = xapian.Query(xapian.Query.OP_AND, terms) |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
125 |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
126 enquire.set_query(query) |
|
4841
3ff1a288fb9c
issue2550583, issue2550635 Do not limit results with Xapian indexer
Thomas Arendsen Hein <thomas@intevation.de>
parents:
4570
diff
changeset
|
127 matches = enquire.get_mset(0, database.get_doccount()) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
128 |
|
5491
e72573996caf
fixed encoding issues for Xapian indexer
Christof Meerwald <cmeerw@cmeerw.org>
parents:
5142
diff
changeset
|
129 return [tuple(b2s(m.document.get_data()).split(':')) |
|
3295
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
130 for m in matches] |
|
a615cc230160
added Xapian indexer; replaces standard indexers if Xapian is available
Richard Jones <richard@users.sourceforge.net>
parents:
diff
changeset
|
131 |
