Skip to content

Commit ee9ecd0

Browse files
committed
Introduce DocumentTooLarge exception PYTHON-630
The idea here is to unify the handling of oversize documents when using the bulk API in MongoDB 2.6 and previous versions. This also means that using bulk Collection.insert against legacy servers will attempt to insert all documents previous to the oversize document before raising.
1 parent af08da3 commit ee9ecd0

File tree

8 files changed

+167
-104
lines changed

8 files changed

+167
-104
lines changed

pymongo/_cmessagemodule.c

Lines changed: 61 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -542,27 +542,48 @@ static PyObject* _cbson_get_more_message(PyObject* self, PyObject* args) {
542542

543543
static void
544544
_set_document_too_large(int size, long max) {
545-
PyObject* InvalidDocument = _error("InvalidDocument");
546-
if (InvalidDocument) {
545+
PyObject* DocumentTooLarge = _error("DocumentTooLarge");
546+
if (DocumentTooLarge) {
547547
#if PY_MAJOR_VERSION >= 3
548548
PyObject* error = PyUnicode_FromFormat(DOC_TOO_LARGE_FMT, size, max);
549549
#else
550550
PyObject* error = PyString_FromFormat(DOC_TOO_LARGE_FMT, size, max);
551551
#endif
552552
if (error) {
553-
PyErr_SetObject(InvalidDocument, error);
553+
PyErr_SetObject(DocumentTooLarge, error);
554554
Py_DECREF(error);
555555
}
556-
Py_DECREF(InvalidDocument);
556+
Py_DECREF(DocumentTooLarge);
557557
}
558558
}
559559

560+
static PyObject*
561+
_send_insert(PyObject* self, PyObject* client,
562+
PyObject* gle_args, buffer_t buffer,
563+
char* coll_name, int coll_len, int request_id, int safe) {
564+
565+
PyObject* result;
566+
if (safe) {
567+
if (!add_last_error(self, buffer, request_id,
568+
coll_name, coll_len, gle_args)) {
569+
return NULL;
570+
}
571+
}
572+
573+
result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id,
574+
buffer_get_buffer(buffer),
575+
buffer_get_position(buffer));
576+
577+
return PyObject_CallMethod(client, "_send_message", "NN",
578+
result, PyBool_FromLong((long)safe));
579+
}
580+
560581
static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
561582
struct module_state *state = GETSTATE(self);
562583

563584
/* NOTE just using a random number as the request_id */
564585
int request_id = rand();
565-
int options = 0;
586+
int send_safe, options = 0;
566587
int length_location, message_length;
567588
int collection_name_length;
568589
char* collection_name = NULL;
@@ -574,7 +595,6 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
574595
PyObject* result;
575596
PyObject* max_bson_size_obj;
576597
PyObject* max_message_size_obj;
577-
PyObject* send_message_result;
578598
unsigned char check_keys;
579599
unsigned char safe;
580600
unsigned char continue_on_error;
@@ -598,6 +618,11 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
598618
if (continue_on_error) {
599619
options += 1;
600620
}
621+
/*
622+
* If we are doing unacknowledged writes *and* continue_on_error
623+
* is True it's pointless (and slower) to send GLE.
624+
*/
625+
send_safe = (safe || !continue_on_error);
601626

602627
max_bson_size_obj = PyObject_GetAttrString(client, "max_bson_size");
603628
#if PY_MAJOR_VERSION >= 3
@@ -651,7 +676,6 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
651676
while ((doc = PyIter_Next(iterator)) != NULL) {
652677
int before = buffer_get_position(buffer);
653678
int cur_size;
654-
empty = 0;
655679
if (!write_dict(state->_cbson, buffer, doc, check_keys, uuid_subtype, 1)) {
656680
Py_DECREF(doc);
657681
goto iterfail;
@@ -660,15 +684,28 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
660684

661685
cur_size = buffer_get_position(buffer) - before;
662686
if (cur_size > max_bson_size) {
687+
/* If we've encoded anything send it before raising. */
688+
if (!empty) {
689+
buffer_update_position(buffer, before);
690+
message_length = buffer_get_position(buffer) - length_location;
691+
memcpy(buffer_get_buffer(buffer) + length_location,
692+
&message_length, 4);
693+
result = _send_insert(self, client, last_error_args, buffer,
694+
collection_name, collection_name_length,
695+
request_id, send_safe);
696+
if (!result)
697+
goto iterfail;
698+
Py_DECREF(result);
699+
}
663700
_set_document_too_large(cur_size, max_bson_size);
664701
goto iterfail;
665702
}
703+
empty = 0;
666704

667705
/* We have enough data, send this batch. */
668706
if (buffer_get_position(buffer) > max_message_size) {
669707
int new_request_id = rand();
670708
int message_start;
671-
PyObject* send_gle = Py_False;
672709
buffer_t new_buffer = buffer_new();
673710
if (!new_buffer) {
674711
PyErr_NoMemory();
@@ -696,29 +733,16 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
696733
message_length = buffer_get_position(buffer) - length_location;
697734
memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4);
698735

699-
/* If we are doing unacknowledged writes *and* continue_on_error
700-
* is True it's pointless (and slower) to send GLE. */
701-
if (safe || !continue_on_error) {
702-
send_gle = Py_True;
703-
if (!add_last_error(self, buffer, request_id, collection_name,
704-
collection_name_length, last_error_args)) {
705-
buffer_free(new_buffer);
706-
goto iterfail;
707-
}
708-
}
709-
/* Objectify buffer */
710-
result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id,
711-
buffer_get_buffer(buffer),
712-
buffer_get_position(buffer));
736+
result = _send_insert(self, client, last_error_args, buffer,
737+
collection_name, collection_name_length,
738+
request_id, send_safe);
739+
713740
buffer_free(buffer);
714741
buffer = new_buffer;
715742
request_id = new_request_id;
716743
length_location = message_start;
717744

718-
send_message_result = PyObject_CallMethod(client, "_send_message",
719-
"NO", result, send_gle);
720-
721-
if (!send_message_result) {
745+
if (!result) {
722746
PyObject *etype = NULL, *evalue = NULL, *etrace = NULL;
723747
PyObject* OperationFailure;
724748
PyErr_Fetch(&etype, &evalue, &etrace);
@@ -757,7 +781,7 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
757781
PyErr_Restore(etype, evalue, etrace);
758782
goto iterfail;
759783
} else {
760-
Py_DECREF(send_message_result);
784+
Py_DECREF(result);
761785
}
762786
}
763787
}
@@ -779,33 +803,21 @@ static PyObject* _cbson_do_batched_insert(PyObject* self, PyObject* args) {
779803
message_length = buffer_get_position(buffer) - length_location;
780804
memcpy(buffer_get_buffer(buffer) + length_location, &message_length, 4);
781805

782-
if (safe) {
783-
if (!add_last_error(self, buffer, request_id, collection_name,
784-
collection_name_length, last_error_args)) {
785-
goto insertfail;
786-
}
787-
}
806+
/* Send the last (or only) batch */
807+
result = _send_insert(self, client, last_error_args, buffer,
808+
collection_name, collection_name_length,
809+
request_id, safe);
788810

789811
PyMem_Free(collection_name);
790-
791-
/* objectify buffer */
792-
result = Py_BuildValue("i" BYTES_FORMAT_STRING, request_id,
793-
buffer_get_buffer(buffer),
794-
buffer_get_position(buffer));
795812
buffer_free(buffer);
796813

797-
/* Send the last (or only) batch */
798-
send_message_result = PyObject_CallMethod(client, "_send_message", "NN",
799-
result,
800-
PyBool_FromLong((long)safe));
801-
802-
if (!send_message_result) {
814+
if (!result) {
803815
Py_XDECREF(exc_type);
804816
Py_XDECREF(exc_value);
805817
Py_XDECREF(exc_trace);
806818
return NULL;
807819
} else {
808-
Py_DECREF(send_message_result);
820+
Py_DECREF(result);
809821
}
810822

811823
if (exc_type) {
@@ -1050,15 +1062,15 @@ _cbson_do_batched_write_command(PyObject* self, PyObject* args) {
10501062
if (op == _INSERT) {
10511063
_set_document_too_large(cur_size, max_bson_size);
10521064
} else {
1053-
PyObject* InvalidDocument = _error("InvalidDocument");
1054-
if (InvalidDocument) {
1065+
PyObject* DocumentTooLarge = _error("DocumentTooLarge");
1066+
if (DocumentTooLarge) {
10551067
/*
10561068
* There's nothing intelligent we can say
10571069
* about size for update and remove.
10581070
*/
1059-
PyErr_SetString(InvalidDocument,
1071+
PyErr_SetString(DocumentTooLarge,
10601072
"command document too large");
1061-
Py_DECREF(InvalidDocument);
1073+
Py_DECREF(DocumentTooLarge);
10621074
}
10631075
}
10641076
goto cmditerfail;

pymongo/bulk.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from bson.objectid import ObjectId
1818
from bson.son import SON
1919
from pymongo.errors import (BulkWriteError,
20+
DocumentTooLarge,
2021
InvalidOperation,
2122
OperationFailure)
2223
from pymongo.message import (_INSERT, _UPDATE, _DELETE,
@@ -26,6 +27,8 @@
2627
_DELETE_ALL = 0
2728
_DELETE_ONE = 1
2829

30+
# For backwards compatibility. See MongoDB src/mongo/base/error_codes.err
31+
_BAD_VALUE = 2
2932
_UNKNOWN_ERROR = 8
3033
_WRITE_CONCERN_ERROR = 64
3134

@@ -84,7 +87,7 @@ def _merge_legacy(run, full_result, result, index):
8487
# will fail.
8588
note = result.get("jnote", result.get("wnote"))
8689
if note:
87-
raise OperationFailure(note, 2, result)
90+
raise OperationFailure(note, _BAD_VALUE, result)
8891

8992
affected = result.get('n', 0)
9093

@@ -368,6 +371,14 @@ def execute_legacy(self, generator, write_concern):
368371
multi=(not operation['limit']),
369372
**write_concern)
370373
_merge_legacy(run, full_result, result, idx)
374+
except DocumentTooLarge, exc:
375+
# MongoDB 2.6 uses error code 2 for "too large".
376+
error = _make_error(
377+
run.index(idx), _BAD_VALUE, str(exc), operation)
378+
full_result['writeErrors'].append(error)
379+
if self.ordered:
380+
stop = True
381+
break
371382
except OperationFailure, exc:
372383
if not exc.details:
373384
# Some error not related to the write operation

pymongo/errors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,8 @@ class ExceededMaxWaiters(Exception):
172172
"""
173173
pass
174174

175+
176+
class DocumentTooLarge(InvalidDocument):
177+
"""Raised when an encoded document is too large for the connected server.
178+
"""
179+
pass

pymongo/message.py

Lines changed: 38 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
_use_c = True
3535
except ImportError:
3636
_use_c = False
37-
from pymongo.errors import InvalidDocument, InvalidOperation, OperationFailure
37+
from pymongo.errors import DocumentTooLarge, InvalidOperation, OperationFailure
3838

3939

4040
MAX_INT32 = 2147483647
@@ -217,45 +217,50 @@ def _insert_message(insert_message, send_safe):
217217
final_message += error_message
218218
return request_id, final_message
219219

220+
send_safe = safe or not continue_on_error
220221
last_error = None
221222
begin = struct.pack("<i", int(continue_on_error))
222223
begin += bson._make_c_string(collection_name)
223224
message_length = len(begin)
224225
data = [begin]
225226
has_docs = False
226227
for doc in docs:
227-
has_docs = True
228228
encoded = bson.BSON.encode(doc, check_keys, uuid_subtype)
229229
encoded_length = len(encoded)
230-
if encoded_length > client.max_bson_size:
231-
raise InvalidDocument("BSON document too large (%d bytes)"
232-
" - the connected server supports"
233-
" BSON document sizes up to %d"
234-
" bytes." %
235-
(encoded_length, client.max_bson_size))
230+
too_large = (encoded_length > client.max_bson_size)
231+
236232
message_length += encoded_length
237-
if message_length < client.max_message_size:
233+
if message_length < client.max_message_size and not too_large:
238234
data.append(encoded)
235+
has_docs = True
239236
continue
240237

241-
# We have enough data, send this message.
242-
send_safe = safe or not continue_on_error
243-
try:
244-
client._send_message(_insert_message(_EMPTY.join(data),
245-
send_safe), send_safe)
246-
# Exception type could be OperationFailure or a subtype
247-
# (e.g. DuplicateKeyError)
248-
except OperationFailure, exc:
249-
# Like it says, continue on error...
250-
if continue_on_error:
251-
# Store exception details to re-raise after the final batch.
252-
last_error = exc
253-
# With unacknowledged writes just return at the first error.
254-
elif not safe:
255-
return
256-
# With acknowledged writes raise immediately.
257-
else:
258-
raise
238+
if has_docs:
239+
# We have enough data, send this message.
240+
try:
241+
client._send_message(_insert_message(_EMPTY.join(data),
242+
send_safe), send_safe)
243+
# Exception type could be OperationFailure or a subtype
244+
# (e.g. DuplicateKeyError)
245+
except OperationFailure, exc:
246+
# Like it says, continue on error...
247+
if continue_on_error:
248+
# Store exception details to re-raise after the final batch.
249+
last_error = exc
250+
# With unacknowledged writes just return at the first error.
251+
elif not safe:
252+
return
253+
# With acknowledged writes raise immediately.
254+
else:
255+
raise
256+
257+
if too_large:
258+
raise DocumentTooLarge("BSON document too large (%d bytes)"
259+
" - the connected server supports"
260+
" BSON document sizes up to %d"
261+
" bytes." %
262+
(encoded_length, client.max_bson_size))
263+
259264
message_length = len(begin) + encoded_length
260265
data = [begin, encoded]
261266

@@ -352,14 +357,14 @@ def send_message():
352357
if (buf.tell() + len(key) + len(value) + 2) >= max_cmd_size:
353358
if not idx:
354359
if operation == _INSERT:
355-
raise InvalidDocument("BSON document too large (%d bytes)"
356-
" - the connected server supports"
357-
" BSON document sizes up to %d"
358-
" bytes." % (len(value),
359-
max_bson_size))
360+
raise DocumentTooLarge("BSON document too large (%d bytes)"
361+
" - the connected server supports"
362+
" BSON document sizes up to %d"
363+
" bytes." % (len(value),
364+
max_bson_size))
360365
# There's nothing intelligent we can say
361366
# about size for update and remove
362-
raise InvalidDocument("command document too large")
367+
raise DocumentTooLarge("command document too large")
363368
result = send_message()
364369
results.append((idx_offset, result))
365370
if ordered and "writeErrors" in result:

pymongo/mongo_client.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
from pymongo.errors import (AutoReconnect,
5757
ConfigurationError,
5858
ConnectionFailure,
59+
DocumentTooLarge,
5960
DuplicateKeyError,
60-
InvalidDocument,
6161
InvalidURI,
6262
OperationFailure)
6363
from pymongo.member import Member
@@ -1049,11 +1049,11 @@ def __check_bson_size(self, message):
10491049
if len(message) == 3:
10501050
(request_id, data, max_doc_size) = message
10511051
if max_doc_size > self.max_bson_size:
1052-
raise InvalidDocument("BSON document too large (%d bytes)"
1053-
" - the connected server supports"
1054-
" BSON document sizes up to %d"
1055-
" bytes." %
1056-
(max_doc_size, self.max_bson_size))
1052+
raise DocumentTooLarge("BSON document too large (%d bytes)"
1053+
" - the connected server supports"
1054+
" BSON document sizes up to %d"
1055+
" bytes." %
1056+
(max_doc_size, self.max_bson_size))
10571057
return (request_id, data)
10581058
else:
10591059
# get_more and kill_cursors messages

0 commit comments

Comments
 (0)