Skip to content

Commit 4d42258

Browse files
committed
Support exhaust cursor flag PYTHON-265
1 parent 40a32cc commit 4d42258

File tree

9 files changed

+244
-84
lines changed

9 files changed

+244
-84
lines changed

doc/api/pymongo/collection.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
.. automethod:: update(spec, document[, upsert=False[, manipulate=False[, safe=None[, multi=False[, check_keys=True[, **kwargs]]]]]])
3434
.. automethod:: remove([spec_or_id=None[, safe=None[, **kwargs]]])
3535
.. automethod:: drop
36-
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, **kwargs]]]]]]]]]]]]]]]])
36+
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False[,**kwargs]]]]]]]]]]]]]]]]])
3737
.. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
3838
.. automethod:: count
3939
.. automethod:: create_index

pymongo/collection.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,32 @@ def find(self, *args, **kwargs):
679679
the nearest member may accept reads. Default 15 milliseconds.
680680
**Ignored by mongos** and must be configured on the command line.
681681
See the localThreshold_ option for more information.
682+
- `exhaust` (optional): If ``True`` create an "exhaust" cursor.
683+
MongoDB will stream batched results to the client without waiting
684+
for the client to request each batch, reducing latency.
685+
686+
.. note:: There are a number of caveats to using the `exhaust`
687+
parameter:
688+
689+
1. The `exhaust` and `limit` options are incompatible and can
690+
not be used together.
691+
692+
2. The `exhaust` option is not supported by mongos and can not be
693+
used with a sharded cluster.
694+
695+
3. A :class:`~pymongo.cursor.Cursor` instance created with the
696+
`exhaust` option requires an exclusive :class:`~socket.socket`
697+
connection to MongoDB. If the :class:`~pymongo.cursor.Cursor` is
698+
discarded without being completely iterated the underlying
699+
:class:`~socket.socket` connection will be closed and discarded
700+
without being returned to the connection pool.
701+
702+
4. A :class:`~pymongo.cursor.Cursor` instance created with the
703+
`exhaust` option in a :doc:`request </examples/requests>` **must**
704+
be completely iterated before executing any other operation.
705+
706+
5. The `network_timeout` option is ignored when using the
707+
`exhaust` option.
682708
683709
.. note:: The `manipulate` parameter may default to False in
684710
a future release.

pymongo/cursor.py

Lines changed: 106 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,28 @@
3434
"partial": 128}
3535

3636

37+
# This has to be an old style class due to
38+
# http://bugs.jython.org/issue1057
39+
class _SocketManager:
40+
"""Used with exhaust cursors to ensure the socket is returned.
41+
"""
42+
def __init__(self, sock, pool):
43+
self.sock = sock
44+
self.pool = pool
45+
self.__closed = False
46+
47+
def __del__(self):
48+
self.close()
49+
50+
def close(self):
51+
"""Return this instance's socket to the connection pool.
52+
"""
53+
if not self.__closed:
54+
self.__closed = True
55+
self.pool.maybe_return_socket(self.sock)
56+
self.sock, self.pool = None, None
57+
58+
3759
# TODO might be cool to be able to do find().include("foo") or
3860
# find().exclude(["bar", "baz"]) or find().slice("a", 1, 2) as an
3961
# alternative to the fields specifier.
@@ -46,7 +68,7 @@ def __init__(self, collection, spec=None, fields=None, skip=0, limit=0,
4668
max_scan=None, as_class=None, slave_okay=False,
4769
await_data=False, partial=False, manipulate=True,
4870
read_preference=ReadPreference.PRIMARY, tag_sets=[{}],
49-
secondary_acceptable_latency_ms=None,
71+
secondary_acceptable_latency_ms=None, exhaust=False,
5072
_must_use_master=False, _uuid_subtype=None, **kwargs):
5173
"""Create a new cursor.
5274
@@ -78,6 +100,8 @@ def __init__(self, collection, spec=None, fields=None, skip=0, limit=0,
78100
raise TypeError("await_data must be an instance of bool")
79101
if not isinstance(partial, bool):
80102
raise TypeError("partial must be an instance of bool")
103+
if not isinstance(exhaust, bool):
104+
raise TypeError("exhaust must be an instance of bool")
81105

82106
if fields is not None:
83107
if not fields:
@@ -95,6 +119,15 @@ def __init__(self, collection, spec=None, fields=None, skip=0, limit=0,
95119
self.__limit = limit
96120
self.__batch_size = 0
97121

122+
# Exhaust cursor support
123+
if self.__collection.database.connection.is_mongos and exhaust:
124+
raise InvalidOperation('Exhaust cursors are '
125+
'not supported by mongos')
126+
if limit and exhaust:
127+
raise InvalidOperation("Can't use limit and exhaust together.")
128+
self.__exhaust = exhaust
129+
self.__exhaust_mgr = None
130+
98131
# This is ugly. People want to be able to do cursor[5:5] and
99132
# get an empty result set (old behavior was an
100133
# exception). It's hard to do that right, though, because the
@@ -193,11 +226,19 @@ def __die(self):
193226
"""Closes this cursor.
194227
"""
195228
if self.__id and not self.__killed:
196-
connection = self.__collection.database.connection
197-
if self.__connection_id is not None:
198-
connection.close_cursor(self.__id, self.__connection_id)
229+
if self.__exhaust and self.__exhaust_mgr:
230+
# If this is an exhaust cursor and we haven't completely
231+
# exhausted the result set we *must* close the socket
232+
# to stop the server from sending more data.
233+
self.__exhaust_mgr.sock.close()
199234
else:
200-
connection.close_cursor(self.__id)
235+
connection = self.__collection.database.connection
236+
if self.__connection_id is not None:
237+
connection.close_cursor(self.__id, self.__connection_id)
238+
else:
239+
connection.close_cursor(self.__id)
240+
if self.__exhaust and self.__exhaust_mgr:
241+
self.__exhaust_mgr.close()
201242
self.__killed = True
202243

203244
def close(self):
@@ -299,6 +340,8 @@ def __query_options(self):
299340
options |= _QUERY_OPTIONS["no_timeout"]
300341
if self.__await_data:
301342
options |= _QUERY_OPTIONS["await_data"]
343+
if self.__exhaust:
344+
options |= _QUERY_OPTIONS["exhaust"]
302345
if self.__partial:
303346
options |= _QUERY_OPTIONS["partial"]
304347
return options
@@ -319,6 +362,14 @@ def add_option(self, mask):
319362
raise TypeError("mask must be an int")
320363
self.__check_okay_to_chain()
321364

365+
if mask & _QUERY_OPTIONS["exhaust"]:
366+
if self.__limit:
367+
raise InvalidOperation("Can't use limit and exhaust together.")
368+
if self.__collection.database.connection.is_mongos:
369+
raise InvalidOperation('Exhaust cursors are '
370+
'not supported by mongos')
371+
self.__exhaust = True
372+
322373
self.__query_flags |= mask
323374
return self
324375

@@ -332,6 +383,9 @@ def remove_option(self, mask):
332383
raise TypeError("mask must be an int")
333384
self.__check_okay_to_chain()
334385

386+
if mask & _QUERY_OPTIONS["exhaust"]:
387+
self.__exhaust = False
388+
335389
self.__query_flags &= ~mask
336390
return self
337391

@@ -350,6 +404,8 @@ def limit(self, limit):
350404
"""
351405
if not isinstance(limit, int):
352406
raise TypeError("limit must be an int")
407+
if self.__exhaust:
408+
raise InvalidOperation("Can't use limit and exhaust together.")
353409
self.__check_okay_to_chain()
354410

355411
self.__empty = False
@@ -689,34 +745,38 @@ def where(self, code):
689745

690746
def __send_message(self, message):
691747
"""Send a query or getmore message and handles the response.
692-
"""
693-
db = self.__collection.database
694-
kwargs = {"_must_use_master": self.__must_use_master}
695-
kwargs["read_preference"] = self.__read_preference
696-
kwargs["tag_sets"] = self.__tag_sets
697-
kwargs["secondary_acceptable_latency_ms"] = (
698-
self.__secondary_acceptable_latency_ms)
699-
if self.__connection_id is not None:
700-
kwargs["_connection_to_use"] = self.__connection_id
701-
kwargs.update(self.__kwargs)
702-
703-
try:
704-
response = db.connection._send_message_with_response(message,
705-
**kwargs)
706-
except AutoReconnect:
707-
# Don't try to send kill cursors on another socket
708-
# or to another server. It can cause a _pinValue
709-
# assertion on some server releases if we get here
710-
# due to a socket timeout.
711-
self.__killed = True
712-
raise
713-
714-
if isinstance(response, tuple):
715-
(connection_id, response) = response
716-
else:
717-
connection_id = None
718748
719-
self.__connection_id = connection_id
749+
If message is ``None`` this is an exhaust cursor, which reads
750+
the next result batch off the exhaust socket instead of
751+
sending getMore messages to the server.
752+
"""
753+
client = self.__collection.database.connection
754+
755+
if message:
756+
kwargs = {"_must_use_master": self.__must_use_master}
757+
kwargs["read_preference"] = self.__read_preference
758+
kwargs["tag_sets"] = self.__tag_sets
759+
kwargs["secondary_acceptable_latency_ms"] = (
760+
self.__secondary_acceptable_latency_ms)
761+
kwargs['exhaust'] = self.__exhaust
762+
if self.__connection_id is not None:
763+
kwargs["_connection_to_use"] = self.__connection_id
764+
kwargs.update(self.__kwargs)
765+
766+
try:
767+
res = client._send_message_with_response(message, **kwargs)
768+
self.__connection_id, (response, sock, pool) = res
769+
if self.__exhaust:
770+
self.__exhaust_mgr = _SocketManager(sock, pool)
771+
except AutoReconnect:
772+
# Don't try to send kill cursors on another socket
773+
# or to another server. It can cause a _pinValue
774+
# assertion on some server releases if we get here
775+
# due to a socket timeout.
776+
self.__killed = True
777+
raise
778+
else: # exhaust cursor - no getMore message
779+
response = client._exhaust_next(self.__exhaust_mgr.sock)
720780

721781
try:
722782
response = helpers._unpack_response(response, self.__id,
@@ -727,7 +787,7 @@ def __send_message(self, message):
727787
# Don't send kill cursors to another server after a "not master"
728788
# error. It's completely pointless.
729789
self.__killed = True
730-
db.connection.disconnect()
790+
client.disconnect()
731791
raise
732792
self.__id = response["cursor_id"]
733793

@@ -743,6 +803,11 @@ def __send_message(self, message):
743803
if self.__limit and self.__id and self.__limit <= self.__retrieved:
744804
self.__die()
745805

806+
# Don't wait for garbage collection to call __del__, return the
807+
# socket to the pool now.
808+
if self.__exhaust and self.__id == 0:
809+
self.__exhaust_mgr.close()
810+
746811
def _refresh(self):
747812
"""Refreshes the cursor with more data from Mongo.
748813
@@ -776,9 +841,14 @@ def _refresh(self):
776841
else:
777842
limit = self.__batch_size
778843

779-
self.__send_message(
780-
message.get_more(self.__collection.full_name,
781-
limit, self.__id))
844+
# Exhaust cursors don't send getMore messages.
845+
if self.__exhaust:
846+
self.__send_message(None)
847+
else:
848+
self.__send_message(
849+
message.get_more(self.__collection.full_name,
850+
limit, self.__id))
851+
782852
else: # Cursor id is zero nothing else to return
783853
self.__killed = True
784854

pymongo/master_slave_connection.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -178,28 +178,29 @@ def _send_message_with_response(self, message, _connection_to_use=None,
178178
"""
179179
if _connection_to_use is not None:
180180
if _connection_to_use == -1:
181-
return (-1,
182-
self.__master._send_message_with_response(message,
183-
**kwargs))
181+
member = self.__master
182+
conn = -1
184183
else:
185-
return (_connection_to_use,
186-
self.__slaves[_connection_to_use]
187-
._send_message_with_response(message, **kwargs))
184+
member = self.__slaves[_connection_to_use]
185+
conn = _connection_to_use
186+
return (conn,
187+
member._send_message_with_response(message, **kwargs)[1])
188188

189189
# _must_use_master is set for commands, which must be sent to the
190190
# master instance. any queries in a request must be sent to the
191191
# master since that is where writes go.
192192
if _must_use_master or self.in_request():
193193
return (-1, self.__master._send_message_with_response(message,
194-
**kwargs))
194+
**kwargs)[1])
195195

196196
# Iterate through the slaves randomly until we have success. Raise
197197
# reconnect if they all fail.
198198
for connection_id in helpers.shuffled(xrange(len(self.__slaves))):
199199
try:
200200
slave = self.__slaves[connection_id]
201201
return (connection_id,
202-
slave._send_message_with_response(message, **kwargs))
202+
slave._send_message_with_response(message,
203+
**kwargs)[1])
203204
except AutoReconnect:
204205
pass
205206

pymongo/mongo_client.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -975,16 +975,18 @@ def __receive_data_on_socket(self, length, sock_info):
975975
message += chunk
976976
return message
977977

978-
def __receive_message_on_socket(self, operation, request_id, sock_info):
979-
"""Receive a message in response to `request_id` on `sock`.
978+
def __receive_message_on_socket(self, operation, rqst_id, sock_info):
979+
"""Receive a message in response to `rqst_id` on `sock`.
980980
981981
Returns the response data with the header removed.
982982
"""
983983
header = self.__receive_data_on_socket(16, sock_info)
984984
length = struct.unpack("<i", header[:4])[0]
985-
msg_req_id = struct.unpack("<i", header[8:12])[0]
986-
assert request_id == msg_req_id, \
987-
"ids don't match %r %r" % (request_id, msg_req_id)
985+
# No rqst_id for exhaust cursor "getMore".
986+
if rqst_id is not None:
987+
resp_id = struct.unpack("<i", header[8:12])[0]
988+
assert rqst_id == resp_id, "ids don't match %r %r" % (rqst_id,
989+
resp_id)
988990
assert operation == struct.unpack("<i", header[12:])[0]
989991

990992
return self.__receive_data_on_socket(length - 16, sock_info)
@@ -1012,28 +1014,30 @@ def _send_message_with_response(self, message,
10121014
- `message`: (request_id, data) pair making up the message to send
10131015
"""
10141016
sock_info = self.__socket()
1015-
1017+
exhaust = kwargs.get('exhaust')
10161018
try:
10171019
try:
1018-
if "network_timeout" in kwargs:
1020+
if not exhaust and "network_timeout" in kwargs:
10191021
sock_info.sock.settimeout(kwargs["network_timeout"])
1020-
return self.__send_and_receive(message, sock_info)
1022+
response = self.__send_and_receive(message, sock_info)
1023+
1024+
if not exhaust:
1025+
if "network_timeout" in kwargs:
1026+
sock_info.sock.settimeout(self.__net_timeout)
1027+
1028+
return (None, (response, sock_info, self.__pool))
10211029
except (ConnectionFailure, socket.error), e:
10221030
self.disconnect()
10231031
raise AutoReconnect(str(e))
10241032
finally:
1025-
if "network_timeout" in kwargs:
1026-
try:
1027-
# Restore the socket's original timeout and return it to
1028-
# the pool
1029-
sock_info.sock.settimeout(self.__net_timeout)
1030-
self.__pool.maybe_return_socket(sock_info)
1031-
except socket.error:
1032-
# There was an exception and we've closed the socket
1033-
pass
1034-
else:
1033+
if not exhaust:
10351034
self.__pool.maybe_return_socket(sock_info)
10361035

1036+
def _exhaust_next(self, sock_info):
1037+
"""Used with exhaust cursors to get the next batch off the socket.
1038+
"""
1039+
return self.__receive_message_on_socket(1, None, sock_info)
1040+
10371041
def start_request(self):
10381042
"""Ensure the current thread or greenlet always uses the same socket
10391043
until it calls :meth:`end_request`. This ensures consistent reads,

0 commit comments

Comments
 (0)