forked from liangliangyy/DjangoBlog
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathelasticsearch_backend.py
More file actions
158 lines (122 loc) · 4.67 KB
/
elasticsearch_backend.py
File metadata and controls
158 lines (122 loc) · 4.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python
# encoding: utf-8
"""
@version: ??
@author: liangliangyy
@license: MIT Licence
@contact: liangliangyy@gmail.com
@site: https://www.lylinux.net/
@software: PyCharm
@file: elasticsearch_backend.py
@time: 2019-04-13 11:46
"""
import logging
import re
from django.utils.encoding import force_text
from elasticsearch_dsl import Q
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
from haystack.models import SearchResult
from haystack.utils import log as logging
from blog.models import Article
from blog.documents import ArticleDocument, ArticleDocumentManager
logger = logging.getLogger(__name__)
class ElasticSearchBackend(BaseSearchBackend):
def __init__(self, connection_alias, **connection_options):
super(ElasticSearchBackend, self).__init__(connection_alias, **connection_options)
self.manager = ArticleDocumentManager()
# try:
# self._rebuild(None)
# except:
# pass
def _get_models(self, iterable):
models = iterable if iterable else Article.objects.all()
docs = self.manager.convert_to_doc(models)
return docs
def _create(self, models):
self.manager.create_index()
docs = self._get_models(models)
self.manager.rebuild(docs)
def _delete(self, models):
for m in models:
m.delete()
return True
def _rebuild(self, models):
models = models if models else Article.objects.all()
docs = self.manager.convert_to_doc(models)
self.manager.update_docs(docs)
def update(self, index, iterable, commit=True):
models = self._get_models(iterable)
self.manager.update_docs(models)
def remove(self, obj_or_string):
models = self._get_models([obj_or_string])
self._delete(models)
def clear(self, models=None, commit=True):
self.remove(None)
@log_query
def search(self, query_string, **kwargs):
logger.info('search query_string:' + query_string)
start_offset = kwargs.get('start_offset')
end_offset = kwargs.get('end_offset')
q = Q('bool',
should=[Q('match', body=query_string), Q('match', title=query_string)],
minimum_should_match="70%"
)
search = ArticleDocument.search() \
.query('bool', filter=[q]) \
.filter('term', status='p') \
.filter('term', type='a') \
.source(False)[start_offset: end_offset]
results = search.execute()
hits = results['hits'].total
raw_results = []
for raw_result in results['hits']['hits']:
app_label = 'blog'
model_name = 'Article'
additional_fields = {}
# if 'highlight' in raw_result:
# additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')
result_class = SearchResult
result = result_class(app_label, model_name, raw_result['_id'], raw_result['_score'],
**additional_fields)
raw_results.append(result)
facets = {}
spelling_suggestion = None
return {
'results': raw_results,
'hits': hits,
'facets': facets,
'spelling_suggestion': spelling_suggestion,
}
class ElasticSearchQuery(BaseSearchQuery):
def _convert_datetime(self, date):
if hasattr(date, 'hour'):
return force_text(date.strftime('%Y%m%d%H%M%S'))
else:
return force_text(date.strftime('%Y%m%d000000'))
def clean(self, query_fragment):
"""
Provides a mechanism for sanitizing user input before presenting the
value to the backend.
Whoosh 1.X differs here in that you can no longer use a backslash
to escape reserved characters. Instead, the whole word should be
quoted.
"""
words = query_fragment.split()
cleaned_words = []
for word in words:
if word in self.backend.RESERVED_WORDS:
word = word.replace(word, word.lower())
for char in self.backend.RESERVED_CHARACTERS:
if char in word:
word = "'%s'" % word
break
cleaned_words.append(word)
return ' '.join(cleaned_words)
def build_query_fragment(self, field, filter_type, value):
return value.query_string
def get_count(self):
results = self.get_results()
return len(results) if results else 0
class ElasticSearchEngine(BaseEngine):
backend = ElasticSearchBackend
query = ElasticSearchQuery