Skip to content

Commit ac29495

Browse files
authored
Merge pull request #207 from cul-it/develop
Search v0.4
2 parents e04c053 + 5926cfb commit ac29495

34 files changed

+826
-267
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,5 @@ ENV/
103103
src/
104104
temp/
105105
.DS_Store
106+
107+
to_index/

Dockerfile-agent

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# article metadata becomes available. Subscribes to a Kinesis stream for
55
# notifications about new metadata.
66

7-
FROM arxiv/search:0.3
7+
FROM arxiv/search:0.4
88

99
WORKDIR /opt/arxiv
1010

Dockerfile-elasticsearch

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# arxiv/eleasticsearch
22
#
3-
# Runs Elasticsearch 6.1.1, with additional plugins.
3+
# Runs Elasticsearch 6.2.4, with additional plugins.
44
#
55
# To run, use the ``docker-compose.yml`` config in this directory to spin up
66
# alongside Kibana. Or:
@@ -12,7 +12,7 @@
1212
#
1313
# ES should be available on tcp://localhost:9200.
1414

15-
FROM docker.elastic.co/elasticsearch/elasticsearch:6.1.1
15+
FROM docker.elastic.co/elasticsearch/elasticsearch:6.2.4
1616

1717
# Install plugins.
1818
#

Dockerfile-kibana

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# arxiv/kibana
22
#
3-
# Runs Kibana 6.1.1. This Dockerfile is not strictly necessary, but it here
3+
# Runs Kibana 6.2.4. This Dockerfile is not strictly necessary, but it here
44
# in case we want to run any additional plugins.
55
#
66
# As of version 0.1, this is here for local development purposes only and is
@@ -10,4 +10,4 @@
1010
# alongside Elasticsearch.
1111

1212

13-
FROM docker.elastic.co/kibana/kibana:6.1.1
13+
FROM docker.elastic.co/kibana/kibana:6.2.4

bulk_index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ def populate(print_indexable: bool, paper_id: str, id_list: str,
4646
index_chunk_size = 250
4747
chunk: List[str] = []
4848
meta: List[DocMeta] = []
49-
49+
index.current_session().create_index()
5050
try:
5151
with click.progressbar(length=approx_size,
5252
label='Papers indexed') as index_bar:

search/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@
224224
FLASKS3_ACTIVE = os.environ.get('FLASKS3_ACTIVE', 0)
225225

226226
# Settings for display of release information
227-
RELEASE_NOTES_URL = 'https://confluence.cornell.edu/x/mBtOFQ'
228-
RELEASE_NOTES_TEXT = 'Search v0.3 released 2018-05-14'
227+
RELEASE_NOTES_URL = 'https://confluence.cornell.edu/x/8H5OFQ'
228+
RELEASE_NOTES_TEXT = 'Search v0.4 released 2018-07-18'
229229

230230

231231
# TODO: one place to set the version, update release notes text, JIRA issue

search/controllers/advanced/__init__.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from typing import Tuple, Dict, Any, Optional
1111
import re
12-
from datetime import date, timedelta, datetime
12+
from datetime import date, datetime
1313
from dateutil.relativedelta import relativedelta
1414
from pytz import timezone
1515

@@ -60,6 +60,7 @@ def search(request_params: MultiDict) -> Response:
6060
InternalServerError
6161
Raised when there is an unrecoverable error while interacting with the
6262
search index.
63+
6364
"""
6465
# We may need to intervene on the request parameters, so we'll
6566
# reinstantiate as a mutable MultiDict.
@@ -133,7 +134,6 @@ def search(request_params: MultiDict) -> Response:
133134
else:
134135
logger.debug('form is invalid: %s', str(form.errors))
135136
if 'order' in form.errors or 'size' in form.errors:
136-
print(form.errors, form.data)
137137
# It's likely that the user tried to set these parameters
138138
# manually, or that the search originated from somewhere else
139139
# (and was configured incorrectly).
@@ -167,6 +167,7 @@ def _query_from_form(form: forms.AdvancedSearchForm) -> AdvancedQuery:
167167
Returns
168168
-------
169169
:class:`.AdvancedQuery`
170+
170171
"""
171172
q = AdvancedQuery()
172173
q = _update_query_with_dates(q, form.date.data)
@@ -177,6 +178,7 @@ def _query_from_form(form: forms.AdvancedSearchForm) -> AdvancedQuery:
177178
order = form.order.data
178179
if order and order != 'None':
179180
q.order = order
181+
q.hide_abstracts = form.abstracts.data == form.HIDE_ABSTRACTS
180182
return q
181183

182184

@@ -213,7 +215,8 @@ def _update_query_with_classification(q: AdvancedQuery, data: MultiDict) \
213215
def _update_query_with_terms(q: AdvancedQuery, terms_data: list) \
214216
-> AdvancedQuery:
215217
q.terms = FieldedSearchList([
216-
FieldedSearchTerm(**term) for term in terms_data if term['term'] # type: ignore
218+
FieldedSearchTerm(**term) # type: ignore
219+
for term in terms_data if term['term']
217220
])
218221
return q
219222

@@ -256,6 +259,9 @@ def _update_query_with_dates(q: AdvancedQuery, date_data: MultiDict) \
256259
start_date=date_data['from_date'],
257260
end_date=date_data['to_date'],
258261
)
262+
263+
if q.date_range:
264+
q.date_range.date_type = date_data['date_type']
259265
return q
260266

261267

search/controllers/advanced/forms.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""Provides form rendering and validation for the advanced search feature."""
22

3+
import calendar
4+
import re
35
from datetime import date, datetime
46
from typing import Callable, Optional, List, Any
57

@@ -11,8 +13,9 @@
1113
from wtforms import widgets
1214

1315
from arxiv import taxonomy
14-
15-
from search.controllers.util import doesNotStartWithWildcard, stripWhiteSpace
16+
from search.domain import DateRange
17+
from search.controllers.util import does_not_start_with_wildcard, \
18+
strip_white_space, has_balanced_quotes
1619

1720

1821
class MultiFormatDateField(DateField):
@@ -21,10 +24,12 @@ class MultiFormatDateField(DateField):
2124
def __init__(self, label: Optional[str] = None,
2225
validators: Optional[List[Callable]] = None,
2326
formats: List[str] = ['%Y-%m-%d %H:%M:%S'],
27+
default_upper_bound: bool = False,
2428
**kwargs: Any) -> None:
2529
"""Override to change ``format: str`` to ``formats: List[str]``."""
2630
super(DateField, self).__init__(label, validators, **kwargs)
2731
self.formats = formats
32+
self.default_upper_bound = default_upper_bound
2833

2934
def _value(self) -> str:
3035
if self.raw_data:
@@ -39,7 +44,17 @@ def process_formdata(self, valuelist: List[str]) -> None:
3944
self.data: Optional[date]
4045
for fmt in self.formats:
4146
try:
42-
self.data = datetime.strptime(date_str, fmt).date()
47+
adj_date = datetime.strptime(date_str, fmt).date()
48+
if self.default_upper_bound:
49+
if not re.search(r'%[Bbm]', fmt):
50+
# when month does not appear in matching format
51+
adj_date = adj_date.replace(month=12, day=31)
52+
elif not re.search('%d', fmt):
53+
# when day does not appear in matching format
54+
last_day = calendar.monthrange(adj_date.year,
55+
adj_date.month)[1]
56+
adj_date = adj_date.replace(day=last_day)
57+
self.data = adj_date
4358
return
4459
except ValueError:
4560
continue
@@ -52,8 +67,9 @@ class FieldForm(Form):
5267

5368
# pylint: disable=too-few-public-methods
5469

55-
term = StringField("Search term...", filters=[stripWhiteSpace],
56-
validators=[doesNotStartWithWildcard])
70+
term = StringField("Search term...", filters=[strip_white_space],
71+
validators=[does_not_start_with_wildcard,
72+
has_balanced_quotes])
5773
operator = SelectField("Operator", choices=[
5874
('AND', 'AND'), ('OR', 'OR'), ('NOT', 'NOT')
5975
], default='AND')
@@ -115,7 +131,8 @@ def yearInBounds(form: Form, field: DateField) -> None:
115131
return None
116132

117133
start_of_time = date(year=1991, month=1, day=1)
118-
if field.data < start_of_time or field.data > date.today():
134+
upper_limit = date.today().replace(year=date.today().year + 1)
135+
if field.data < start_of_time or field.data > upper_limit:
119136
raise ValidationError('Not a valid publication year')
120137

121138

@@ -146,9 +163,24 @@ class DateForm(Form):
146163
to_date = MultiFormatDateField(
147164
'to',
148165
validators=[validators.Optional(), yearInBounds],
149-
formats=['%Y-%m-%d', '%Y-%m', '%Y']
166+
formats=['%Y-%m-%d', '%Y-%m', '%Y'],
167+
default_upper_bound=True
150168
)
151169

170+
SUBMITTED_ORIGINAL = DateRange.SUBMITTED_ORIGINAL
171+
SUBMITTED_CURRENT = DateRange.SUBMITTED_CURRENT
172+
ANNOUNCED = DateRange.ANNOUNCED
173+
DATE_TYPE_CHOICES = [
174+
(SUBMITTED_CURRENT, 'Submission date (most recent)'),
175+
(SUBMITTED_ORIGINAL, 'Submission date (original)'),
176+
(ANNOUNCED, 'Announcement date'),
177+
]
178+
date_type = RadioField('Apply to', choices=DATE_TYPE_CHOICES,
179+
default=SUBMITTED_CURRENT,
180+
description="You may filter on either submission"
181+
" date or announcement date. Note that announcement"
182+
" date supports only year and month granularity.")
183+
152184
def validate_filter_by(self, field: RadioField) -> None:
153185
"""Ensure that related fields are filled."""
154186
if field.data == 'specific_year' and not self.data.get('year'):
@@ -175,6 +207,7 @@ class AdvancedSearchForm(Form):
175207
classification = FormField(ClassificationForm)
176208
date = FormField(DateForm)
177209
size = SelectField('results per page', default=50, choices=[
210+
('25', '25'),
178211
('50', '50'),
179212
('100', '100'),
180213
('200', '200')
@@ -187,3 +220,11 @@ class AdvancedSearchForm(Form):
187220
('', 'Relevance')
188221
], validators=[validators.Optional()], default='-announced_date_first')
189222
include_older_versions = BooleanField('Include older versions of papers')
223+
224+
HIDE_ABSTRACTS = 'hide'
225+
SHOW_ABSTRACTS = 'show'
226+
227+
abstracts = RadioField('Abstracts', choices=[
228+
(SHOW_ABSTRACTS, 'Show abstracts'),
229+
(HIDE_ABSTRACTS, 'Hide abstracts')
230+
], default=SHOW_ABSTRACTS)

search/controllers/advanced/tests.py

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,43 @@ def test_date_range_must_be_specified(self):
290290
form = AdvancedSearchForm(data)
291291
self.assertTrue(form.validate())
292292

293+
# ARXIVNG-997
294+
def test_end_date_bounding(self):
295+
"""If a user selects an end date, it must be bounded correctly."""
296+
data = MultiDict({
297+
'terms-0-operator': 'AND',
298+
'terms-0-field': 'title',
299+
'terms-0-term': 'foo',
300+
'date-filter_by': 'date_range',
301+
'date-to_date': '2012'
302+
})
303+
form = AdvancedSearchForm(data)
304+
self.assertTrue(form.validate())
305+
self.assertEqual(form.date.to_date.data,
306+
date(year=2012, month=12, day=31))
307+
308+
data['date-to_date'] = '2012-02'
309+
form = AdvancedSearchForm(data)
310+
self.assertTrue(form.validate())
311+
self.assertEqual(form.date.to_date.data,
312+
date(year=2012, month=2, day=29))
313+
314+
data['date-to_date'] = '2016-06'
315+
form = AdvancedSearchForm(data)
316+
self.assertTrue(form.validate())
317+
self.assertEqual(form.date.to_date.data,
318+
date(year=2016, month=6, day=30))
319+
320+
data['date-to_date'] = '2016-06-30'
321+
form = AdvancedSearchForm(data)
322+
self.assertTrue(form.validate())
323+
self.assertEqual(form.date.to_date.data,
324+
date(year=2016, month=6, day=30))
325+
326+
data['date-to_date'] = '2100-02'
327+
form = AdvancedSearchForm(data)
328+
self.assertFalse(form.validate())
329+
293330
def test_year_must_be_after_1990(self):
294331
"""If the user selects a specific year, it must be after 1990."""
295332
data = MultiDict({
@@ -324,6 +361,28 @@ def test_input_whitespace_is_stripped(self):
324361
self.assertEqual(form.terms[0].term.data, 'foo',
325362
"Whitespace should be stripped.")
326363

364+
def test_querystring_has_unbalanced_quotes(self):
365+
"""Querystring has an odd number of quote characters."""
366+
data = MultiDict({
367+
'terms-0-operator': 'AND',
368+
'terms-0-field': 'title',
369+
'terms-0-term': '"rhubarb'
370+
})
371+
form = AdvancedSearchForm(data)
372+
self.assertFalse(form.validate(), "Form should be invalid")
373+
374+
data['terms-0-term'] = '"rhubarb"'
375+
form = AdvancedSearchForm(data)
376+
self.assertTrue(form.validate(), "Form should be valid")
377+
378+
data['terms-0-term'] = '"rhubarb" "pie'
379+
form = AdvancedSearchForm(data)
380+
self.assertFalse(form.validate(), "Form should be invalid")
381+
382+
data['terms-0-term'] = '"rhubarb" "pie"'
383+
form = AdvancedSearchForm(data)
384+
self.assertTrue(form.validate(), "Form should be valid")
385+
327386

328387
class TestUpdatequeryWithClassification(TestCase):
329388
""":func:`.advanced._update_query_with_classification` adds classfnxn."""
@@ -436,7 +495,7 @@ class TestUpdateQueryWithDates(TestCase):
436495

437496
def test_past_12_is_selected(self):
438497
"""Query selects the past twelve months."""
439-
date_data = {'filter_by': 'past_12'}
498+
date_data = {'filter_by': 'past_12', 'date_type': 'submitted_date'}
440499
q = advanced._update_query_with_dates(Query(), date_data)
441500
self.assertIsInstance(q, Query)
442501
self.assertIsInstance(q.date_range, DateRange)
@@ -449,7 +508,7 @@ def test_past_12_is_selected(self):
449508

450509
def test_all_dates_is_selected(self):
451510
"""Query does not select on date."""
452-
date_data = {'filter_by': 'all_dates'}
511+
date_data = {'filter_by': 'all_dates', 'date_type': 'submitted_date'}
453512
q = advanced._update_query_with_dates(AdvancedQuery(), date_data)
454513
self.assertIsInstance(q, AdvancedQuery)
455514
self.assertIsNone(q.date_range)
@@ -458,7 +517,8 @@ def test_specific_year_is_selected(self):
458517
"""Start and end dates are set, one year apart."""
459518
date_data = {
460519
'filter_by': 'specific_year',
461-
'year': date(year=1999, month=1, day=1)
520+
'year': date(year=1999, month=1, day=1),
521+
'date_type': 'submitted_date'
462522
}
463523
q = advanced._update_query_with_dates(AdvancedQuery(), date_data)
464524
self.assertIsInstance(q, AdvancedQuery)
@@ -475,6 +535,7 @@ def test_date_range_is_selected(self):
475535
'filter_by': 'date_range',
476536
'from_date': from_date,
477537
'to_date': to_date,
538+
'date_type': 'submitted_date'
478539
}
479540
q = advanced._update_query_with_dates(AdvancedQuery(), date_data)
480541
self.assertIsInstance(q, AdvancedQuery)

0 commit comments

Comments
 (0)