77See :func:`._query_all_fields` for information on how results are scored.
88"""
99
10- from typing import Any , List , Tuple , Callable , Dict
10+ from typing import Any , List , Tuple , Callable , Dict , Optional
1111from functools import reduce , wraps
1212from operator import ior , iand
1313import re
1919
2020from search .domain import SimpleQuery , Query , AdvancedQuery , Classification
2121from .util import strip_tex , Q_ , is_tex_query , is_literal_query , escape , \
22- wildcardEscape , remove_single_characters , has_wildcard
22+ wildcardEscape , remove_single_characters , has_wildcard , match_date_partial
2323from .highlighting import HIGHLIGHT_TAG_OPEN , HIGHLIGHT_TAG_CLOSE
2424from .authors import author_query , author_id_query , orcid_query
2525
@@ -85,6 +85,19 @@ def _query_doi(term: str, operator: str = 'and') -> Q:
8585
8686
8787def _query_primary (term : str , operator : str = 'and' ) -> Q :
88+ # In the 'or' case, we're basically just looking for hit highlighting
89+ # after a match on the combined field. Since primary classification fields
90+ # are keyword fields, they won't match the same way as the combined field
91+ # (text). So we have to be a bit fuzzy here to get the highlight.
92+ # TODO: in a future version, we should consider changes to the mappings
93+ # to make this more straightforward.
94+ if operator == 'or' :
95+ return reduce (ior , [(
96+ Q ("match" , ** {"primary_classification__category__id" : {"query" : part , "operator" : operator }})
97+ | Q ("wildcard" , ** {"primary_classification.category.name" : f"*{ part } *" })
98+ | Q ("match" , ** {"primary_classification__archive__id" : {"query" : part , "operator" : operator }})
99+ | Q ("wildcard" , ** {"primary_classification.archive.name" : f"*{ part } *" })
100+ ) for part in term .split ()])
88101 return (
89102 Q ("match" , ** {"primary_classification__category__id" : {"query" : term , "operator" : operator }})
90103 | Q ("match" , ** {"primary_classification__category__name" : {"query" : term , "operator" : operator }})
@@ -94,8 +107,21 @@ def _query_primary(term: str, operator: str = 'and') -> Q:
94107
95108
96109def _query_paper_id (term : str , operator : str = 'and' ) -> Q :
97- return (Q_ ('match' , 'paper_id' , escape (term ), operator = operator )
98- | Q_ ('match' , 'paper_id_v' , escape (term ), operator = operator ))
110+ operator = operator .lower ()
111+ logger .debug (f'query paper ID with: { term } ' )
112+ q = (Q_ ('match' , 'paper_id' , escape (term ), operator = operator )
113+ | Q_ ('match' , 'paper_id_v' , escape (term ), operator = operator ))
114+ return q
115+
116+
117+ def _query_combined (term : str ) -> Q :
118+ # Only wildcards in literals should be escaped.
119+ wildcard_escaped , has_wildcard = wildcardEscape (term )
120+ query_term = (wildcard_escaped if has_wildcard else escape (term )).lower ()
121+
122+ # All terms must match in the combined field.
123+ return Q ("query_string" , fields = ['combined' ], default_operator = 'AND' ,
124+ allow_leading_wildcard = False , query = query_term )
99125
100126
101127def _query_all_fields (term : str ) -> Q :
@@ -144,20 +170,26 @@ def _query_all_fields(term: str) -> Q:
144170 if is_tex_query (term ):
145171 return _tex_query ('title' , term ) | _tex_query ('abstract' , term )
146172
147- # Only wildcards in literals should be escaped.
148- wildcard_escaped , has_wildcard = wildcardEscape (term )
149- query_term = wildcard_escaped if has_wildcard else escape (term )
150-
151- # All terms must match in the combined field.
152- _query = query_term .lower ()
153- match_all_fields = Q ("query_string" , fields = ['combined' ],
154- default_operator = 'AND' ,
155- allow_leading_wildcard = False ,
156- query = escape (_query ))
173+ date_partial : Optional [str ] = None
174+ remainder : Optional [str ] = None
175+ try :
176+ date_partial , remainder = match_date_partial (term )
177+ logger .debug (f'found date partial: { date_partial } ' )
178+ except ValueError :
179+ pass
180+ logger .debug (f'partial: { date_partial } ; rem: { remainder } ' )
181+
182+ match_all_fields = _query_combined (term )
183+ if date_partial :
184+ _q = Q ("term" , announced_date_first = date_partial )
185+ if remainder :
186+ _q &= _query_combined (remainder )
187+ match_all_fields |= _q
157188
158189 # We include matches of any term in any field, so that we can highlight
159190 # and score appropriately.
160191 queries = [
192+ _query_paper_id (term , operator = 'or' ),
161193 author_query (term , operator = 'OR' ),
162194 _query_title (term , default_operator = 'or' ),
163195 _query_abstract (term , default_operator = 'or' ),
@@ -171,7 +203,14 @@ def _query_all_fields(term: str) -> Q:
171203 _query_msc_class (term , operator = 'or' ),
172204 _query_primary (term , operator = 'or' )
173205 ]
206+
207+ if date_partial :
208+ queries .insert (0 , Q ("term" , announced_date_first = date_partial ))
209+
210+ # If the whole query matches on a specific field, we should consider that
211+ # responsive even if the query on the combined field does not respond.
174212 conj_queries = [
213+ _query_paper_id (term , operator = 'AND' ),
175214 author_query (term , operator = 'AND' ),
176215 _query_title (term , default_operator = 'and' ),
177216 _query_abstract (term , default_operator = 'and' ),
@@ -185,6 +224,7 @@ def _query_all_fields(term: str) -> Q:
185224 _query_msc_class (term , operator = 'and' ),
186225 _query_primary (term , operator = 'and' )
187226 ]
227+
188228 query = (match_all_fields | reduce (ior , conj_queries ))
189229 query &= Q ("bool" , should = queries ) # Partial matches across fields.
190230 scores = [SF ({'weight' : i + 1 , 'filter' : q })
0 commit comments