Skip to content

Commit 237754d

Browse files
committed
PYTHON-310: Added find() method to gridfs
Allows iteration through arbitrary queries against the files collection using a new GridOutCursor class.
1 parent 4142442 commit 237754d

File tree

6 files changed

+175
-10
lines changed

6 files changed

+175
-10
lines changed

doc/api/gridfs/grid_file.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@
1717

1818
.. autoclass:: GridFile
1919
:members:
20+
21+
.. autoclass:: GridOutCursor
22+
:members:

gridfs/__init__.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
from gridfs.errors import (NoFile,
2424
UnsupportedAPI)
2525
from gridfs.grid_file import (GridIn,
26-
GridOut)
26+
GridOut,
27+
GridOutCursor)
2728
from pymongo import (MongoClient,
2829
ASCENDING,
2930
DESCENDING)
@@ -278,6 +279,72 @@ def list(self):
278279
name for name in self.__files.distinct("filename")
279280
if name is not None]
280281

282+
def find(self, *args, **kwargs):
283+
"""Query GridFS for files.
284+
285+
Returns a cursor that iterates across files matching
286+
arbitrary queries on the files collection. Can be combined
287+
with other modifiers for additional control. For example
288+
289+
>>> for grid_out in fs.find({"filename": "lisa.txt"}, timeout=False):
290+
>>> data = grid_out.read()
291+
292+
would iterate through all versions of "lisa.txt" stored in GridFS.
293+
Note that setting timeout to False may be important to prevent the
294+
cursor from timing out during long multi-file processing work.
295+
296+
As another example, the call
297+
298+
>>> most_recent_three = fs.find().sort("uploadDate", -1).limit(3)
299+
300+
would return a cursor to the three most recently uploaded files
301+
in GridFS.
302+
303+
Follows a similar interface to
304+
:meth:`~pymongo.collection.Collection.find`
305+
in :class:`~pymongo.collection.Collection`.
306+
307+
:Parameters:
308+
- `spec` (optional): a SON object specifying elements which
309+
must be present for a document to be included in the
310+
result set
311+
- `skip` (optional): the number of files to omit (from
312+
the start of the result set) when returning the results
313+
- `limit` (optional): the maximum number of results to
314+
return
315+
- `timeout` (optional): if True (the default), any returned
316+
cursor is closed by the server after 10 minutes of
317+
inactivity. If set to False, the returned cursor will never
318+
time out on the server. Care should be taken to ensure that
319+
cursors with timeout turned off are properly closed.
320+
- `sort` (optional): a list of (key, direction) pairs
321+
specifying the sort order for this query. See
322+
:meth:`~pymongo.cursor.Cursor.sort` for details.
323+
- `max_scan` (optional): limit the number of file documents
324+
examined when performing the query
325+
- `read_preference` (optional): The read preference for
326+
this query.
327+
- `tag_sets` (optional): The tag sets for this query.
328+
- `secondary_acceptable_latency_ms` (optional): Any replica-set
329+
member whose ping time is within secondary_acceptable_latency_ms of
330+
the nearest member may accept reads. Default 15 milliseconds.
331+
**Ignored by mongos** and must be configured on the command line.
332+
See the localThreshold_ option for more information.
333+
- `compile_re` (optional): if ``False``, don't attempt to compile
334+
BSON regex objects into Python regexes. Return instances of
335+
:class:`~bson.regex.Regex` instead.
336+
337+
Raises :class:`TypeError` if any of the arguments are of
338+
improper type. Returns an instance of
339+
:class:`~gridfs.grid_file.GridOutCursor`
340+
corresponding to this query.
341+
342+
.. versionadded:: 2.7
343+
.. mongodoc:: find
344+
.. _localThreshold: http://docs.mongodb.org/manual/reference/mongos/#cmdoption-mongos--localThreshold
345+
"""
346+
return GridOutCursor(self.__collection, *args, **kwargs)
347+
281348
def exists(self, document_or_id=None, **kwargs):
282349
"""Check if a file exists in this instance of :class:`GridFS`.
283350

gridfs/grid_file.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
UnsupportedAPI)
2828
from pymongo import ASCENDING
2929
from pymongo.collection import Collection
30+
from pymongo.cursor import Cursor
3031
from pymongo.errors import DuplicateKeyError
3132

3233
try:
@@ -614,3 +615,54 @@ class GridFile(object):
614615
def __init__(self, *args, **kwargs):
615616
raise UnsupportedAPI("The GridFile class is no longer supported. "
616617
"Please use GridIn or GridOut instead.")
618+
619+
620+
class GridOutCursor(Cursor):
621+
"""A cursor / iterator for returning GridOut objects as the result
622+
of an arbitrary query against the GridFS files collection.
623+
"""
624+
def __init__(self, collection, spec=None, skip=0, limit=0,
625+
timeout=True, sort=None, max_scan=None,
626+
read_preference=None, tag_sets=None,
627+
secondary_acceptable_latency_ms=None, compile_re=True):
628+
"""Create a new cursor, similar to the normal
629+
:class:`~pymongo.cursor.Cursor`.
630+
631+
Should not be called directly by application developers - see
632+
the :class:`~gridfs.GridFS` method :meth:`~gridfs.GridFS.find` instead.
633+
634+
.. versionadded 2.7
635+
636+
.. mongodoc:: cursors
637+
"""
638+
# Hold on to the base "fs" collection to create GridOut objects later.
639+
self.__root_collection = collection
640+
641+
# Copy these settings from collection if they are not set by caller.
642+
read_preference = read_preference or collection.files.read_preference
643+
tag_sets = tag_sets or collection.files.tag_sets
644+
latency = (secondary_acceptable_latency_ms
645+
or collection.files.secondary_acceptable_latency_ms)
646+
647+
super(GridOutCursor, self).__init__(
648+
collection.files, spec, skip=skip, limit=limit, timeout=timeout,
649+
sort=sort, max_scan=max_scan, read_preference=read_preference,
650+
secondary_acceptable_latency_ms=latency, compile_re=compile_re,
651+
tag_sets=tag_sets)
652+
653+
def next(self):
654+
"""Get next GridOut object from cursor.
655+
"""
656+
next_file = super(GridOutCursor, self).next()
657+
return GridOut(self.__root_collection, file_document=next_file)
658+
659+
def add_option(self, *args, **kwargs):
660+
raise NotImplementedError("Method does not exist for GridOutCursor")
661+
662+
def remove_option(self, *args, **kwargs):
663+
raise NotImplementedError("Method does not exist for GridOutCursor")
664+
665+
def _clone_base(self):
666+
"""Creates an empty GridOutCursor for information to be copied into.
667+
"""
668+
return GridOutCursor(self.__root_collection)

pymongo/cursor.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,11 @@ def clone(self):
219219
unevaluated, even if the current instance has been partially or
220220
completely evaluated.
221221
"""
222-
return self.__clone(True)
222+
return self._clone(True)
223223

224-
def __clone(self, deepcopy=True):
224+
def _clone(self, deepcopy=True):
225225
self.__check_not_command_cursor('clone')
226-
clone = Cursor(self.__collection)
226+
clone = self._clone_base()
227227
values_to_clone = ("spec", "fields", "skip", "limit", "max_time_ms",
228228
"comment", "max", "min",
229229
"snapshot", "ordering", "explain", "hint",
@@ -235,10 +235,15 @@ def __clone(self, deepcopy=True):
235235
data = dict((k, v) for k, v in self.__dict__.iteritems()
236236
if k.startswith('_Cursor__') and k[9:] in values_to_clone)
237237
if deepcopy:
238-
data = self.__deepcopy(data)
238+
data = self._deepcopy(data)
239239
clone.__dict__.update(data)
240240
return clone
241241

242+
def _clone_base(self):
243+
"""Creates an empty Cursor object for information to be copied into.
244+
"""
245+
return Cursor(self.__collection)
246+
242247
def __die(self):
243248
"""Closes this cursor.
244249
"""
@@ -1017,16 +1022,16 @@ def __copy__(self):
10171022
10181023
.. versionadded:: 2.4
10191024
"""
1020-
return self.__clone(deepcopy=False)
1025+
return self._clone(deepcopy=False)
10211026

10221027
def __deepcopy__(self, memo):
10231028
"""Support function for `copy.deepcopy()`.
10241029
10251030
.. versionadded:: 2.4
10261031
"""
1027-
return self.__clone(deepcopy=True)
1032+
return self._clone(deepcopy=True)
10281033

1029-
def __deepcopy(self, x, memo=None):
1034+
def _deepcopy(self, x, memo=None):
10301035
"""Deepcopy helper for the data dictionary or list.
10311036
10321037
Regular expressions cannot be deep copied but as they are immutable we
@@ -1046,7 +1051,7 @@ def __deepcopy(self, x, memo=None):
10461051

10471052
for key, value in iterator:
10481053
if isinstance(value, (dict, list)) and not isinstance(value, SON):
1049-
value = self.__deepcopy(value, memo)
1054+
value = self._deepcopy(value, memo)
10501055
elif not isinstance(value, RE_TYPE):
10511056
value = copy.deepcopy(value, memo)
10521057

test/test_grid_file.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
_SEEK_END,
3333
GridIn,
3434
GridFile,
35-
GridOut)
35+
GridOut,
36+
GridOutCursor)
3637
from gridfs.errors import (NoFile,
3738
UnsupportedAPI)
3839
from pymongo import MongoClient
@@ -589,6 +590,27 @@ def test_grid_in_lazy_connect(self):
589590
self.assertRaises(ConnectionFailure, infile.write, b('data goes here'))
590591
self.assertRaises(ConnectionFailure, infile.close)
591592

593+
def test_grid_out_cursor_options(self):
594+
self.assertRaises(TypeError, GridOutCursor.__init__, self.db.fs, {},
595+
tailable=True)
596+
self.assertRaises(TypeError, GridOutCursor.__init__, self.db.fs, {},
597+
fields={"filename":1})
598+
599+
cursor = GridOutCursor(self.db.fs, {})
600+
min_ms = self.db.fs.files.secondary_acceptable_latency_ms
601+
new_ms = cursor._Cursor__secondary_acceptable_latency_ms
602+
self.assertEqual(min_ms, new_ms)
603+
cursor = GridOutCursor(self.db.fs, {},
604+
secondary_acceptable_latency_ms=100)
605+
min_ms = self.db.fs.files.secondary_acceptable_latency_ms
606+
new_ms = cursor._Cursor__secondary_acceptable_latency_ms
607+
self.assertNotEqual(min_ms, new_ms)
608+
cursor_clone = cursor.clone()
609+
self.assertEqual(cursor_clone.__dict__, cursor.__dict__)
610+
611+
self.assertRaises(NotImplementedError, cursor.add_option, 0)
612+
self.assertRaises(NotImplementedError, cursor.remove_option, 0)
613+
592614

593615
if __name__ == "__main__":
594616
unittest.main()

test/test_gridfs.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,22 @@ def test_gridfs_lazy_connect(self):
373373
f = fs.new_file() # Still no connection.
374374
self.assertRaises(ConnectionFailure, f.close)
375375

376+
def test_gridfs_find(self):
377+
self.fs.put(b("test2"), filename="two")
378+
self.fs.put(b("test2+"), filename="two")
379+
self.fs.put(b("test1"), filename="one")
380+
self.fs.put(b("test2++"), filename="two")
381+
self.assertEqual(3, self.fs.find({"filename":"two"}).count())
382+
self.assertEqual(4, self.fs.find().count())
383+
cursor = self.fs.find(timeout=False).sort("uploadDate", -1).skip(1).limit(2)
384+
self.assertEqual(b("test1"), cursor.next().read())
385+
cursor.rewind()
386+
self.assertEqual(b("test1"), cursor.next().read())
387+
self.assertEqual(b("test2+"), cursor.next().read())
388+
self.assertRaises(StopIteration, cursor.next)
389+
cursor.close()
390+
self.assertRaises(TypeError, self.fs.find, {}, {"_id": True})
391+
376392

377393
class TestGridfsReplicaSet(TestReplicaSetClientBase):
378394
def test_gridfs_replica_set(self):

0 commit comments

Comments
 (0)