Skip to content

Commit d133396

Browse files
author
A. Jesse Jiryu Davis
committed
Revert "Capture all BSON decode errors and wrap with InvalidBSON. PYTHON-494"
This reverts commit ba66a2d.
1 parent 9d9ac1c commit d133396

File tree

3 files changed

+61
-107
lines changed

3 files changed

+61
-107
lines changed

bson/__init__.py

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ def _dict_to_bson(dict, check_keys, uuid_subtype, top_level=True):
484484
_dict_to_bson = _cbson._dict_to_bson
485485

486486

487+
487488
def decode_all(data, as_class=dict,
488489
tz_aware=True, uuid_subtype=OLD_UUID_SUBTYPE):
489490
"""Decode BSON data to multiple documents.
@@ -503,25 +504,17 @@ def decode_all(data, as_class=dict,
503504
docs = []
504505
position = 0
505506
end = len(data) - 1
506-
try:
507-
while position < end:
508-
obj_size = struct.unpack("<i", data[position:position + 4])[0]
509-
if len(data) - position < obj_size:
510-
raise InvalidBSON("objsize too large")
511-
if data[position + obj_size - 1:position + obj_size] != ZERO:
512-
raise InvalidBSON("bad eoo")
513-
elements = data[position + 4:position + obj_size - 1]
514-
position += obj_size
515-
docs.append(_elements_to_dict(elements, as_class,
516-
tz_aware, uuid_subtype))
517-
return docs
518-
except InvalidBSON:
519-
raise
520-
except Exception:
521-
# Change exception type to InvalidBSON but preserve traceback.
522-
exc_type, exc_value, exc_tb = sys.exc_info()
523-
raise InvalidBSON, str(exc_value), exc_tb
524-
507+
while position < end:
508+
obj_size = struct.unpack("<i", data[position:position + 4])[0]
509+
if len(data) - position < obj_size:
510+
raise InvalidBSON("objsize too large")
511+
if data[position + obj_size - 1:position + obj_size] != ZERO:
512+
raise InvalidBSON("bad eoo")
513+
elements = data[position + 4:position + obj_size - 1]
514+
position += obj_size
515+
docs.append(_elements_to_dict(elements, as_class,
516+
tz_aware, uuid_subtype))
517+
return docs
525518
if _use_c:
526519
decode_all = _cbson.decode_all
527520

bson/_cbsonmodule.c

Lines changed: 47 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,8 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
13361336
unsigned char tz_aware, unsigned char uuid_subtype) {
13371337
struct module_state *state = GETSTATE(self);
13381338

1339-
PyObject* value = NULL;
1339+
PyObject* value;
1340+
PyObject* error;
13401341
switch (type) {
13411342
case 1:
13421343
{
@@ -1346,6 +1347,9 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
13461347
}
13471348
memcpy(&d, buffer + *position, 8);
13481349
value = PyFloat_FromDouble(d);
1350+
if (!value) {
1351+
return NULL;
1352+
}
13491353
*position += 8;
13501354
break;
13511355
}
@@ -1358,6 +1362,9 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
13581362
}
13591363
*position += 4;
13601364
value = PyUnicode_DecodeUTF8(buffer + *position, value_length, "strict");
1365+
if (!value) {
1366+
return NULL;
1367+
}
13611368
*position += value_length + 1;
13621369
break;
13631370
}
@@ -1371,10 +1378,10 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
13711378
}
13721379
value = elements_to_dict(self, buffer + *position + 4,
13731380
size - 5, as_class, tz_aware, uuid_subtype);
1374-
13751381
if (!value) {
1376-
goto invalid;
1382+
return NULL;
13771383
}
1384+
13781385
/* Decoding for DBRefs */
13791386
collection = PyDict_GetItemString(value, "$ref");
13801387
if (collection) { /* DBRef */
@@ -1410,6 +1417,9 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
14101417
Py_DECREF(id);
14111418
Py_DECREF(collection);
14121419
Py_DECREF(database);
1420+
if (!value) {
1421+
return NULL;
1422+
}
14131423
}
14141424

14151425
*position += size;
@@ -1429,7 +1439,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
14291439

14301440
value = PyList_New(0);
14311441
if (!value) {
1432-
goto invalid;
1442+
return NULL;
14331443
}
14341444
while (*position < end) {
14351445
PyObject* to_append;
@@ -1446,7 +1456,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
14461456
max - (int)key_size, as_class, tz_aware, uuid_subtype);
14471457
if (!to_append) {
14481458
Py_DECREF(value);
1449-
goto invalid;
1459+
return NULL;
14501460
}
14511461
PyList_Append(value, to_append);
14521462
Py_DECREF(to_append);
@@ -1485,20 +1495,20 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
14851495
}
14861496
#endif
14871497
if (!data) {
1488-
goto invalid;
1498+
return NULL;
14891499
}
14901500
if ((subtype == 3 || subtype == 4) && state->UUID) { // Encode as UUID, not Binary
14911501
PyObject* kwargs;
14921502
PyObject* args = PyTuple_New(0);
14931503
if (!args) {
14941504
Py_DECREF(data);
1495-
goto invalid;
1505+
return NULL;
14961506
}
14971507
kwargs = PyDict_New();
14981508
if (!kwargs) {
14991509
Py_DECREF(data);
15001510
Py_DECREF(args);
1501-
goto invalid;
1511+
return NULL;
15021512
}
15031513

15041514
assert(length == 16); // UUID should always be 16 bytes
@@ -1532,6 +1542,10 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
15321542
Py_DECREF(args);
15331543
Py_DECREF(kwargs);
15341544
Py_DECREF(data);
1545+
if (!value) {
1546+
return NULL;
1547+
}
1548+
15351549
*position += length + 5;
15361550
break;
15371551

@@ -1554,6 +1568,9 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
15541568
value = PyObject_CallFunctionObjArgs(state->Binary, data, st, NULL);
15551569
Py_DECREF(st);
15561570
Py_DECREF(data);
1571+
if (!value) {
1572+
return NULL;
1573+
}
15571574
*position += length + 5;
15581575
break;
15591576
}
@@ -1574,6 +1591,9 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
15741591
#else
15751592
value = PyObject_CallFunction(state->ObjectId, "s#", buffer + *position, 12);
15761593
#endif
1594+
if (!value) {
1595+
return NULL;
1596+
}
15771597
*position += 12;
15781598
break;
15791599
}
@@ -1600,29 +1620,29 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
16001620
}
16011621

16021622
if (!naive) {
1603-
goto invalid;
1623+
return NULL;
16041624
}
16051625
replace = PyObject_GetAttrString(naive, "replace");
16061626
Py_DECREF(naive);
16071627
if (!replace) {
1608-
goto invalid;
1628+
return NULL;
16091629
}
16101630
args = PyTuple_New(0);
16111631
if (!args) {
16121632
Py_DECREF(replace);
1613-
goto invalid;
1633+
return NULL;
16141634
}
16151635
kwargs = PyDict_New();
16161636
if (!kwargs) {
16171637
Py_DECREF(replace);
16181638
Py_DECREF(args);
1619-
goto invalid;
1639+
return NULL;
16201640
}
16211641
if (PyDict_SetItemString(kwargs, "tzinfo", state->UTC) == -1) {
16221642
Py_DECREF(replace);
16231643
Py_DECREF(args);
16241644
Py_DECREF(kwargs);
1625-
goto invalid;
1645+
return NULL;
16261646
}
16271647
value = PyObject_Call(replace, args, kwargs);
16281648
Py_DECREF(replace);
@@ -1641,7 +1661,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
16411661
}
16421662
pattern = PyUnicode_DecodeUTF8(buffer + *position, pattern_length, "strict");
16431663
if (!pattern) {
1644-
goto invalid;
1664+
return NULL;
16451665
}
16461666
*position += (int)pattern_length + 1;
16471667
if ((flags_length = strlen(buffer + *position)) > BSON_MAX_SIZE) {
@@ -1687,14 +1707,14 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
16871707
collection = PyUnicode_DecodeUTF8(buffer + *position,
16881708
coll_length, "strict");
16891709
if (!collection) {
1690-
goto invalid;
1710+
return NULL;
16911711
}
16921712
*position += (int)coll_length + 1;
16931713

16941714
id = PyObject_CallFunction(state->ObjectId, "s#", buffer + *position, 12);
16951715
if (!id) {
16961716
Py_DECREF(collection);
1697-
goto invalid;
1717+
return NULL;
16981718
}
16991719
*position += 12;
17001720
value = PyObject_CallFunctionObjArgs(state->DBRef, collection, id, NULL);
@@ -1712,7 +1732,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17121732
*position += 4;
17131733
code = PyUnicode_DecodeUTF8(buffer + *position, value_length, "strict");
17141734
if (!code) {
1715-
goto invalid;
1735+
return NULL;
17161736
}
17171737
*position += value_length + 1;
17181738
value = PyObject_CallFunctionObjArgs(state->Code, code, NULL, NULL);
@@ -1733,7 +1753,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17331753
}
17341754
code = PyUnicode_DecodeUTF8(buffer + *position, code_length, "strict");
17351755
if (!code) {
1736-
goto invalid;
1756+
return NULL;
17371757
}
17381758
*position += (int)code_length + 1;
17391759

@@ -1742,7 +1762,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17421762
(PyObject*)&PyDict_Type, tz_aware, uuid_subtype);
17431763
if (!scope) {
17441764
Py_DECREF(code);
1745-
goto invalid;
1765+
return NULL;
17461766
}
17471767
*position += scope_size;
17481768

@@ -1764,7 +1784,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17641784
value = PyInt_FromLong(i);
17651785
#endif
17661786
if (!value) {
1767-
goto invalid;
1787+
return NULL;
17681788
}
17691789
*position += 4;
17701790
break;
@@ -1779,7 +1799,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17791799
memcpy(&time, buffer + *position + 4, 4);
17801800
value = PyObject_CallFunction(state->Timestamp, "II", time, inc);
17811801
if (!value) {
1782-
goto invalid;
1802+
return NULL;
17831803
}
17841804
*position += 8;
17851805
break;
@@ -1793,7 +1813,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17931813
memcpy(&ll, buffer + *position, 8);
17941814
value = PyLong_FromLongLong(ll);
17951815
if (!value) {
1796-
goto invalid;
1816+
return NULL;
17971817
}
17981818
*position += 8;
17991819
break;
@@ -1819,44 +1839,14 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
18191839
return NULL;
18201840
}
18211841
}
1822-
1823-
if (value) {
1824-
return value;
1825-
}
1842+
return value;
18261843

18271844
invalid:
18281845

1829-
/* Wrap any non-InvalidBSON errors in InvalidBSON. */
1830-
if (PyErr_Occurred()) {
1831-
/* Calling _error clears the error state, so fetch it first. */
1832-
PyObject *etype, *evalue, *etrace, *InvalidBSON;
1833-
PyErr_Fetch(&etype, &evalue, &etrace);
1834-
InvalidBSON = _error("InvalidBSON");
1835-
if (InvalidBSON) {
1836-
if (!PyErr_GivenExceptionMatches(etype, InvalidBSON)) {
1837-
/* Raise InvalidBSON(str(e)). */
1838-
PyObject *msg = NULL;
1839-
Py_DECREF(etype);
1840-
etype = InvalidBSON;
1841-
1842-
if (evalue) {
1843-
msg = PyObject_Str(evalue);
1844-
Py_DECREF(evalue);
1845-
evalue = msg;
1846-
}
1847-
PyErr_NormalizeException(&etype, &evalue, &etrace);
1848-
}
1849-
}
1850-
/* Steals references to args. */
1851-
PyErr_Restore(etype, evalue, etrace);
1852-
Py_XDECREF(InvalidBSON);
1853-
return NULL;
1854-
} else {
1855-
PyObject *InvalidBSON = _error("InvalidBSON");
1856-
if (InvalidBSON) {
1857-
PyErr_SetNone(InvalidBSON);
1858-
Py_DECREF(InvalidBSON);
1859-
}
1846+
error = _error("InvalidBSON");
1847+
if (error) {
1848+
PyErr_SetNone(error);
1849+
Py_DECREF(error);
18601850
}
18611851
return NULL;
18621852
}

test/test_bson.py

Lines changed: 2 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@
1616

1717
"""Test the bson module."""
1818

19+
import unittest
1920
import datetime
2021
import re
2122
import sys
22-
import traceback
23-
import unittest
2423
try:
2524
import uuid
2625
should_test_uuid = True
@@ -42,8 +41,7 @@
4241
from bson.son import SON
4342
from bson.timestamp import Timestamp
4443
from bson.errors import (InvalidDocument,
45-
InvalidStringData,
46-
InvalidBSON)
44+
InvalidStringData)
4745
from bson.max_key import MaxKey
4846
from bson.min_key import MinKey
4947
from bson.tz_util import (FixedOffset,
@@ -450,32 +448,5 @@ def test_ordered_dict(self):
450448
d = OrderedDict([("one", 1), ("two", 2), ("three", 3), ("four", 4)])
451449
self.assertEqual(d, BSON.encode(d).decode(as_class=OrderedDict))
452450

453-
def test_exception_wrapping(self):
454-
# No matter what exception is raised while trying to decode BSON,
455-
# the final exception always matches InvalidBSON and the original
456-
# is traceback preserved.
457-
458-
# Invalid Python regex, though valid PCRE: {'r': /[\w-\.]/}
459-
# Will cause an error in re.compile().
460-
bad_doc = b('"\x00\x00\x00\x07_id\x00R\x013\xd4S1\xe3\xd3\xd6Sgs'
461-
'\x0br\x00[\\w-\\.]\x00\x00\x00')
462-
463-
try:
464-
decode_all(bad_doc)
465-
except InvalidBSON:
466-
exc_type, exc_value, exc_tb = sys.exc_info()
467-
# Original re error was captured and wrapped in InvalidBSON.
468-
self.assertEqual(exc_value.args[0], 'bad character range')
469-
470-
# Traceback includes bson module's call into re module.
471-
for filename, lineno, fname, text in traceback.extract_tb(exc_tb):
472-
if filename.endswith('re.py') and fname == 'compile':
473-
# Traceback was correctly preserved.
474-
break
475-
else:
476-
self.fail('Traceback not captured')
477-
else:
478-
self.fail('InvalidBSON not raised')
479-
480451
if __name__ == "__main__":
481452
unittest.main()

0 commit comments

Comments
 (0)