Skip to content

Commit 3c85a98

Browse files
committed
Support per-collection UUID subtype PYTHON-267
1 parent 2bba32b commit 3c85a98

11 files changed

Lines changed: 217 additions & 83 deletions

File tree

bson/__init__.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import struct
2222
import warnings
2323

24-
from bson.binary import Binary
24+
from bson.binary import Binary, UUID_SUBTYPE
2525
from bson.code import Code
2626
from bson.dbref import DBRef
2727
from bson.errors import (InvalidBSON,
@@ -276,7 +276,7 @@ def _bson_to_dict(data, as_class, tz_aware):
276276
_bson_to_dict = _cbson._bson_to_dict
277277

278278

279-
def _element_to_bson(key, value, check_keys):
279+
def _element_to_bson(key, value, check_keys, uuid_subtype):
280280
if not isinstance(key, basestring):
281281
raise InvalidDocument("documents must have only string keys, "
282282
"key was %r" % key)
@@ -294,7 +294,7 @@ def _element_to_bson(key, value, check_keys):
294294
# Use Binary w/ subtype 3 for UUID instances
295295
if _use_uuid:
296296
if isinstance(value, uuid.UUID):
297-
value = Binary(value.bytes, subtype=4)
297+
value = Binary(value.bytes, subtype=uuid_subtype)
298298

299299
if isinstance(value, Binary):
300300
subtype = value.subtype
@@ -304,7 +304,7 @@ def _element_to_bson(key, value, check_keys):
304304
chr(subtype), value)
305305
if isinstance(value, Code):
306306
cstring = _make_c_string(value)
307-
scope = _dict_to_bson(value.scope, False, False)
307+
scope = _dict_to_bson(value.scope, False, uuid_subtype, False)
308308
full_length = struct.pack("<i", 8 + len(cstring) + len(scope))
309309
length = struct.pack("<i", len(cstring))
310310
return "\x0F" + name + full_length + length + cstring + scope
@@ -317,10 +317,10 @@ def _element_to_bson(key, value, check_keys):
317317
length = struct.pack("<i", len(cstring))
318318
return "\x02" + name + length + cstring
319319
if isinstance(value, dict):
320-
return "\x03" + name + _dict_to_bson(value, check_keys, False)
320+
return "\x03" + name + _dict_to_bson(value, check_keys, uuid_subtype, False)
321321
if isinstance(value, (list, tuple)):
322322
as_dict = SON(zip([str(i) for i in range(len(value))], value))
323-
return "\x04" + name + _dict_to_bson(as_dict, check_keys, False)
323+
return "\x04" + name + _dict_to_bson(as_dict, check_keys, uuid_subtype, False)
324324
if isinstance(value, ObjectId):
325325
return "\x07" + name + value.binary
326326
if value is True:
@@ -369,7 +369,7 @@ def _element_to_bson(key, value, check_keys):
369369
return "\x0B" + name + _make_c_string(pattern, True) + \
370370
_make_c_string(flags)
371371
if isinstance(value, DBRef):
372-
return _element_to_bson(key, value.as_doc(), False)
372+
return _element_to_bson(key, value.as_doc(), False, uuid_subtype)
373373
if isinstance(value, MinKey):
374374
return "\xFF" + name
375375
if isinstance(value, MaxKey):
@@ -379,14 +379,14 @@ def _element_to_bson(key, value, check_keys):
379379
type(value))
380380

381381

382-
def _dict_to_bson(dict, check_keys, top_level=True):
382+
def _dict_to_bson(dict, check_keys, uuid_subtype, top_level=True):
383383
try:
384384
elements = []
385385
if top_level and "_id" in dict:
386-
elements.append(_element_to_bson("_id", dict["_id"], False))
386+
elements.append(_element_to_bson("_id", dict["_id"], False, uuid_subtype))
387387
for (key, value) in dict.iteritems():
388388
if not top_level or key != "_id":
389-
elements.append(_element_to_bson(key, value, check_keys))
389+
elements.append(_element_to_bson(key, value, check_keys, uuid_subtype))
390390
except AttributeError:
391391
raise TypeError("encoder expected a mapping type but got: %r" % dict)
392392

@@ -478,7 +478,7 @@ def from_dict(cls, dct, check_keys=False):
478478
return cls.encode(dct, check_keys)
479479

480480
@classmethod
481-
def encode(cls, document, check_keys=False):
481+
def encode(cls, document, check_keys=False, uuid_subtype=UUID_SUBTYPE):
482482
"""Encode a document to a new :class:`BSON` instance.
483483
484484
A document can be any mapping type (like :class:`dict`).
@@ -497,7 +497,7 @@ def encode(cls, document, check_keys=False):
497497
498498
.. versionadded:: 1.9
499499
"""
500-
return cls(_dict_to_bson(document, check_keys))
500+
return cls(_dict_to_bson(document, check_keys, uuid_subtype))
501501

502502
def to_dict(self, as_class=dict, tz_aware=False):
503503
"""DEPRECATED - `to_dict` has been renamed to `decode`.

bson/_cbson.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,16 @@ typedef unsigned int Py_ssize_t;
3030
int buffer_write_bytes(buffer_t buffer, const char* data, int size);
3131

3232
int write_dict(buffer_t buffer, PyObject* dict,
33-
unsigned char check_keys, unsigned char top_level);
33+
unsigned char check_keys, unsigned char uuid_subtype,
34+
unsigned char top_level);
3435

3536
int write_pair(buffer_t buffer, const char* name, Py_ssize_t name_length,
36-
PyObject* value, unsigned char check_keys, unsigned char allow_id);
37+
PyObject* value, unsigned char check_keys,
38+
unsigned char uuid_subtype, unsigned char allow_id);
3739

3840
int decode_and_write_pair(buffer_t buffer, PyObject* key, PyObject* value,
39-
unsigned char check_keys, unsigned char top_level);
41+
unsigned char check_keys, unsigned char uuid_subtype,
42+
unsigned char top_level);
4043

4144
PyMODINIT_FUNC init_cbson(void);
4245
#endif

bson/_cbsonmodule.c

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ static PyTypeObject* REType = NULL;
9191
static PyObject* elements_to_dict(const char* string, int max,
9292
PyObject* as_class, unsigned char tz_aware);
9393

94-
static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* value,
95-
unsigned char check_keys, unsigned char first_attempt);
94+
static int _write_element_to_buffer(buffer_t buffer, int type_byte,
95+
PyObject* value, unsigned char check_keys,
96+
unsigned char uuid_subtype, unsigned char first_attempt);
9697

9798
/* Date stuff */
9899
static PyObject* datetime_from_millis(long long millis) {
@@ -212,12 +213,13 @@ static int _reload_python_objects(void) {
212213

213214
static int write_element_to_buffer(buffer_t buffer, int type_byte,
214215
PyObject* value, unsigned char check_keys,
216+
unsigned char uuid_subtype,
215217
unsigned char first_attempt) {
216218
int result;
217219
if(Py_EnterRecursiveCall(" while encoding an object to BSON "))
218220
return 0;
219221
result = _write_element_to_buffer(buffer, type_byte, value,
220-
check_keys, first_attempt);
222+
check_keys, uuid_subtype, first_attempt);
221223
Py_LeaveRecursiveCall();
222224
return result;
223225
}
@@ -227,8 +229,9 @@ static int write_element_to_buffer(buffer_t buffer, int type_byte,
227229
* space has already been reserved.
228230
*
229231
* returns 0 on failure */
230-
static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* value,
231-
unsigned char check_keys, unsigned char first_attempt) {
232+
static int _write_element_to_buffer(buffer_t buffer, int type_byte,
233+
PyObject* value, unsigned char check_keys,
234+
unsigned char uuid_subtype, unsigned char first_attempt) {
232235
if (PyBool_Check(value)) {
233236
const long bool = PyInt_AsLong(value);
234237
const char c = bool ? 0x01 : 0x00;
@@ -270,7 +273,7 @@ static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* va
270273
return 1;
271274
} else if (PyDict_Check(value)) {
272275
*(buffer_get_buffer(buffer) + type_byte) = 0x03;
273-
return write_dict(buffer, value, check_keys, 0);
276+
return write_dict(buffer, value, check_keys, uuid_subtype, 0);
274277
} else if (PyList_Check(value) || PyTuple_Check(value)) {
275278
int start_position,
276279
length_location,
@@ -310,7 +313,8 @@ static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* va
310313
free(name);
311314

312315
item_value = PySequence_GetItem(value, i);
313-
if (!write_element_to_buffer(buffer, list_type_byte, item_value, check_keys, 1)) {
316+
if (!write_element_to_buffer(buffer, list_type_byte,
317+
item_value, check_keys, uuid_subtype, 1)) {
314318
Py_DECREF(item_value);
315319
return 0;
316320
}
@@ -371,7 +375,7 @@ static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* va
371375

372376
// UUID is always 16 bytes, subtype 3
373377
int length = 16;
374-
const char subtype = 4;
378+
const char subtype = (const char)uuid_subtype;
375379

376380
PyObject* bytes;
377381

@@ -417,7 +421,7 @@ static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* va
417421
if (!scope) {
418422
return 0;
419423
}
420-
if (!write_dict(buffer, scope, 0, 0)) {
424+
if (!write_dict(buffer, scope, 0, uuid_subtype, 0)) {
421425
Py_DECREF(scope);
422426
return 0;
423427
}
@@ -494,7 +498,7 @@ static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* va
494498
if (!as_doc) {
495499
return 0;
496500
}
497-
if (!write_dict(buffer, as_doc, 0, 0)) {
501+
if (!write_dict(buffer, as_doc, 0, uuid_subtype, 0)) {
498502
Py_DECREF(as_doc);
499503
return 0;
500504
}
@@ -631,7 +635,7 @@ static int _write_element_to_buffer(buffer_t buffer, int type_byte, PyObject* va
631635
if (_reload_python_objects()) {
632636
return 0;
633637
}
634-
return write_element_to_buffer(buffer, type_byte, value, check_keys, 0);
638+
return write_element_to_buffer(buffer, type_byte, value, check_keys, uuid_subtype, 0);
635639
}
636640
{
637641
PyObject* errmsg = PyString_FromString("Cannot encode object: ");
@@ -673,7 +677,8 @@ static int check_key_name(const char* name,
673677
*
674678
* Returns 0 on failure */
675679
int write_pair(buffer_t buffer, const char* name, Py_ssize_t name_length,
676-
PyObject* value, unsigned char check_keys, unsigned char allow_id) {
680+
PyObject* value, unsigned char check_keys,
681+
unsigned char uuid_subtype, unsigned char allow_id) {
677682
int type_byte;
678683

679684
/* Don't write any _id elements unless we're explicitly told to -
@@ -694,15 +699,17 @@ int write_pair(buffer_t buffer, const char* name, Py_ssize_t name_length,
694699
if (!buffer_write_bytes(buffer, name, name_length + 1)) {
695700
return 0;
696701
}
697-
if (!write_element_to_buffer(buffer, type_byte, value, check_keys, 1)) {
702+
if (!write_element_to_buffer(buffer, type_byte, value,
703+
check_keys, uuid_subtype, 1)) {
698704
return 0;
699705
}
700706
return 1;
701707
}
702708

703709
int decode_and_write_pair(buffer_t buffer,
704710
PyObject* key, PyObject* value,
705-
unsigned char check_keys, unsigned char top_level) {
711+
unsigned char check_keys,
712+
unsigned char uuid_subtype, unsigned char top_level) {
706713
PyObject* encoded;
707714
if (PyUnicode_Check(key)) {
708715
result_t status;
@@ -754,7 +761,8 @@ int decode_and_write_pair(buffer_t buffer,
754761

755762
/* If top_level is True, don't allow writing _id here - it was already written. */
756763
if (!write_pair(buffer, PyString_AsString(encoded),
757-
PyString_Size(encoded), value, check_keys, !top_level)) {
764+
PyString_Size(encoded), value,
765+
check_keys, uuid_subtype, !top_level)) {
758766
Py_DECREF(encoded);
759767
return 0;
760768
}
@@ -764,7 +772,8 @@ int decode_and_write_pair(buffer_t buffer,
764772
}
765773

766774
/* returns 0 on failure */
767-
int write_dict(buffer_t buffer, PyObject* dict, unsigned char check_keys, unsigned char top_level) {
775+
int write_dict(buffer_t buffer, PyObject* dict,
776+
unsigned char check_keys, unsigned char uuid_subtype, unsigned char top_level) {
768777
PyObject* key;
769778
PyObject* iter;
770779
char zero = 0;
@@ -792,7 +801,7 @@ int write_dict(buffer_t buffer, PyObject* dict, unsigned char check_keys, unsign
792801
if (_id) {
793802
/* Don't bother checking keys, but do make sure we're allowed to
794803
* write _id */
795-
if (!write_pair(buffer, "_id", 3, _id, 0, 1)) {
804+
if (!write_pair(buffer, "_id", 3, _id, 0, uuid_subtype, 1)) {
796805
return 0;
797806
}
798807
}
@@ -810,7 +819,8 @@ int write_dict(buffer_t buffer, PyObject* dict, unsigned char check_keys, unsign
810819
Py_DECREF(iter);
811820
return 0;
812821
}
813-
if (!decode_and_write_pair(buffer, key, value, check_keys, top_level)) {
822+
if (!decode_and_write_pair(buffer, key, value,
823+
check_keys, uuid_subtype, top_level)) {
814824
Py_DECREF(key);
815825
Py_DECREF(iter);
816826
return 0;
@@ -832,9 +842,10 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
832842
PyObject* dict;
833843
PyObject* result;
834844
unsigned char check_keys;
845+
unsigned char uuid_subtype;
835846
buffer_t buffer;
836847

837-
if (!PyArg_ParseTuple(args, "Ob", &dict, &check_keys)) {
848+
if (!PyArg_ParseTuple(args, "Obb", &dict, &check_keys, &uuid_subtype)) {
838849
return NULL;
839850
}
840851

@@ -844,7 +855,7 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
844855
return NULL;
845856
}
846857

847-
if (!write_dict(buffer, dict, check_keys, 1)) {
858+
if (!write_dict(buffer, dict, check_keys, uuid_subtype, 1)) {
848859
buffer_free(buffer);
849860
return NULL;
850861
}

bson/binary.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@
2424
BINARY_SUBTYPE = 0
2525
"""BSON binary subtype for binary data.
2626
27-
This is becomming the default subtype and should be the most commonly
28-
used.
27+
This is the default subtype for binary data.
2928
3029
.. versionadded:: 1.5
3130
"""
@@ -39,13 +38,13 @@
3938
OLD_BINARY_SUBTYPE = 2
4039
"""Old BSON binary subtype for binary data.
4140
42-
This is still the default subtype, but that is changing to
43-
:data:`BINARY_SUBTYPE`.
41+
This is the old default subtype, the current
42+
default is :data:`BINARY_SUBTYPE`.
4443
4544
.. versionadded:: 1.7
4645
"""
4746

48-
UUID_SUBTYPE = 3
47+
UUID_SUBTYPE = 4
4948
"""BSON binary subtype for a UUID.
5049
5150
:class:`uuid.UUID` instances will automatically be encoded
@@ -54,6 +53,12 @@
5453
.. versionadded:: 1.5
5554
"""
5655

56+
OLD_UUID_SUBTYPE = 3
57+
"""Old BSON binary subtype for a UUID.
58+
59+
.. versionadded:: 2.0.1+
60+
"""
61+
5762
MD5_SUBTYPE = 5
5863
"""BSON binary subtype for an MD5 hash.
5964
@@ -156,7 +161,7 @@ class UUIDLegacy(Binary):
156161
def __new__(cls, obj):
157162
if not isinstance(obj, UUID):
158163
raise TypeError("obj must be an instance of uuid.UUID")
159-
self = Binary.__new__(cls, obj.bytes, 3)
164+
self = Binary.__new__(cls, obj.bytes, OLD_UUID_SUBTYPE)
160165
self.__uuid = obj
161166
return self
162167

0 commit comments

Comments
 (0)