Skip to content

Commit e775295

Browse files
committed
PYTHON-1367 Ignore uuid_representation when decoding BSON binary subtype 4.
1 parent e86742d commit e775295

File tree

7 files changed

+46
-20
lines changed

7 files changed

+46
-20
lines changed

bson/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def _get_binary(data, position, obj_end, opts, dummy1):
200200
end = position + length
201201
if length < 0 or end > obj_end:
202202
raise InvalidBSON('bad binary object length')
203-
if subtype in (3, 4):
203+
if subtype == 3:
204204
# Java Legacy
205205
uuid_representation = opts.uuid_representation
206206
if uuid_representation == JAVA_LEGACY:
@@ -213,6 +213,8 @@ def _get_binary(data, position, obj_end, opts, dummy1):
213213
else:
214214
value = uuid.UUID(bytes=data[position:end])
215215
return value, end
216+
if subtype == 4:
217+
return uuid.UUID(bytes=data[position:end]), end
216218
# Python3 special case. Decode subtype 0 to 'bytes'.
217219
if PY3 and subtype == 0:
218220
value = data[position:end]

bson/_cbsonmodule.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1960,13 +1960,13 @@ static PyObject* get_value(PyObject* self, PyObject* name, const char* buffer,
19601960
* From this point, we hold refs to args, kwargs, and data.
19611961
* If anything fails, goto uuiderror to clean them up.
19621962
*/
1963-
if (options->uuid_rep == CSHARP_LEGACY) {
1963+
if (subtype == 3 && options->uuid_rep == CSHARP_LEGACY) {
19641964
/* Legacy C# byte order */
19651965
if ((PyDict_SetItemString(kwargs, "bytes_le", data)) == -1)
19661966
goto uuiderror;
19671967
}
19681968
else {
1969-
if (options->uuid_rep == JAVA_LEGACY) {
1969+
if (subtype == 3 && options->uuid_rep == JAVA_LEGACY) {
19701970
/* Convert from legacy java byte order */
19711971
char big_endian[16];
19721972
_fix_java(buffer + *position, big_endian);

bson/binary.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,19 +80,23 @@
8080
"""The Java legacy UUID representation.
8181
8282
:class:`uuid.UUID` instances will automatically be encoded to
83-
and decoded from BSON binary, using the Java driver's legacy
84-
byte order with binary subtype :data:`OLD_UUID_SUBTYPE`.
83+
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
84+
using the Java driver's legacy byte order.
8585
86+
.. versionchanged:: 3.6
87+
BSON binary subtype 4 is decoded using RFC-4122 byte order.
8688
.. versionadded:: 2.3
8789
"""
8890

8991
CSHARP_LEGACY = 6
9092
"""The C#/.net legacy UUID representation.
9193
9294
:class:`uuid.UUID` instances will automatically be encoded to
93-
and decoded from BSON binary, using the C# driver's legacy
94-
byte order and binary subtype :data:`OLD_UUID_SUBTYPE`.
95+
and decoded from BSON binary subtype :data:`OLD_UUID_SUBTYPE`,
96+
using the C# driver's legacy byte order.
9597
98+
.. versionchanged:: 3.6
99+
BSON binary subtype 4 is decoded using RFC-4122 byte order.
96100
.. versionadded:: 2.3
97101
"""
98102

doc/changelog.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,16 @@ Highlights include:
1818
:meth:`~pymongo.collection.Collection.aggregate_raw_batches` for use with
1919
external libraries that can parse raw batches of BSON data.
2020

21+
Breaking changes include:
22+
23+
- BSON binary subtype 4 is decoded using RFC-4122 byte order regardless
24+
of the UUID representation. This is a change in behavior for applications
25+
that use UUID representation :data:`bson.binary.JAVA_LEGACY` or
26+
:data:`bson.binary.CSHARP_LEGACY` to decode BSON binary subtype 4. Other
27+
UUID representations, :data:`bson.binary.PYTHON_LEGACY` (the default) and
28+
:data:`bson.binary.STANDARD`, and the decoding of BSON binary subtype 3
29+
are unchanged.
30+
2131
Changes in Version 3.5.1
2232
------------------------
2333

test/test_binary.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,15 @@ def test_hash(self):
135135
self.assertNotEqual(hash(one), hash(two))
136136
self.assertEqual(hash(Binary(b"hello world", 42)), hash(two))
137137

138+
def test_uuid_subtype_4(self):
139+
"""uuid_representation should be ignored when decoding subtype 4."""
140+
expected_uuid = uuid.uuid4()
141+
doc = {"uuid": Binary(expected_uuid.bytes, 4)}
142+
encoded = bson.BSON.encode(doc)
143+
for uuid_representation in ALL_UUID_REPRESENTATIONS:
144+
options = CodecOptions(uuid_representation=uuid_representation)
145+
self.assertEqual(expected_uuid, encoded.decode(options)["uuid"])
146+
138147
def test_legacy_java_uuid(self):
139148
# Test decoding
140149
data = self.java_data

test/test_change_stream.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@
2424
sys.path[0:0] = ['']
2525

2626
from bson import BSON, ObjectId, SON
27-
from bson.binary import (Binary,
28-
CSHARP_LEGACY,
29-
JAVA_LEGACY,
27+
from bson.binary import (ALL_UUID_REPRESENTATIONS,
28+
Binary,
3029
STANDARD,
3130
PYTHON_LEGACY)
3231
from bson.raw_bson import DEFAULT_RAW_BSON_OPTIONS, RawBSONDocument
@@ -314,8 +313,7 @@ def test_raw(self):
314313

315314
def test_uuid_representations(self):
316315
"""Test with uuid document _ids and different uuid_representation."""
317-
for uuid_representation in (STANDARD, PYTHON_LEGACY, CSHARP_LEGACY,
318-
JAVA_LEGACY):
316+
for uuid_representation in ALL_UUID_REPRESENTATIONS:
319317
for id_subtype in (STANDARD, PYTHON_LEGACY):
320318
resume_token = None
321319
options = self.coll.codec_options.with_options(

test/test_json_util.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,9 @@
3333
from bson import json_util, EPOCH_AWARE, EPOCH_NAIVE, SON
3434
from bson.json_util import (DatetimeRepresentation,
3535
STRICT_JSON_OPTIONS)
36-
from bson.binary import (Binary, MD5_SUBTYPE, USER_DEFINED_SUBTYPE,
37-
JAVA_LEGACY, CSHARP_LEGACY, STANDARD)
36+
from bson.binary import (ALL_UUID_REPRESENTATIONS, Binary, MD5_SUBTYPE,
37+
USER_DEFINED_SUBTYPE, JAVA_LEGACY, CSHARP_LEGACY,
38+
STANDARD)
3839
from bson.code import Code
3940
from bson.dbref import DBRef
4041
from bson.int64 import Int64
@@ -268,12 +269,14 @@ def test_uuid(self):
268269
strict_uuid=True, uuid_representation=STANDARD)))
269270
self.assertEqual(doc, json_util.loads(
270271
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "03"}}'))
271-
self.assertEqual(doc, json_util.loads(
272-
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": "04"}}'))
273-
self.round_trip(doc, json_options=json_util.JSONOptions(
274-
strict_uuid=True, uuid_representation=JAVA_LEGACY))
275-
self.round_trip(doc, json_options=json_util.JSONOptions(
276-
strict_uuid=True, uuid_representation=CSHARP_LEGACY))
272+
for uuid_representation in ALL_UUID_REPRESENTATIONS:
273+
options = json_util.JSONOptions(
274+
strict_uuid=True, uuid_representation=uuid_representation)
275+
self.round_trip(doc, json_options=options)
276+
# Ignore UUID representation when decoding BSON binary subtype 4.
277+
self.assertEqual(doc, json_util.loads(
278+
'{"uuid": {"$binary": "9HrBC1jMQ3KlZw4CssPUeQ==", "$type": '
279+
'"04"}}', json_options=options))
277280

278281
def test_binary(self):
279282
if PY3:

0 commit comments

Comments
 (0)