Skip to content

Commit ab4d63b

Browse files
committed
Better handling of corrupt/invalid BSON PYTHON-571
1 parent 12283dc commit ab4d63b

File tree

3 files changed

+155
-42
lines changed

3 files changed

+155
-42
lines changed

bson/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,19 @@ def _get_number(data, position, as_class, tz_aware, uuid_subtype):
139139

140140

141141
def _get_string(data, position, as_class, tz_aware, uuid_subtype):
142-
length = struct.unpack("<i", data[position:position + 4])[0] - 1
142+
length = struct.unpack("<i", data[position:position + 4])[0]
143+
if (len(data) - position - 4) < length:
144+
raise InvalidBSON("invalid string length")
143145
position += 4
144-
return _get_c_string(data, position, length)
146+
if data[position + length - 1] != ZERO:
147+
raise InvalidBSON("invalid end of string")
148+
return _get_c_string(data, position, length - 1)
145149

146150

147151
def _get_object(data, position, as_class, tz_aware, uuid_subtype):
148152
obj_size = struct.unpack("<i", data[position:position + 4])[0]
153+
if data[position + obj_size - 1:position + obj_size] != ZERO:
154+
raise InvalidBSON("bad eoo")
149155
encoded = data[position + 4:position + obj_size - 1]
150156
object = _elements_to_dict(encoded, as_class, tz_aware, uuid_subtype)
151157
position += obj_size

bson/_cbsonmodule.c

Lines changed: 134 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ static struct module_state _state;
9797
#define JAVA_LEGACY 5
9898
#define CSHARP_LEGACY 6
9999
#define BSON_MAX_SIZE 2147483647
100+
/* The smallest possible BSON document, i.e. "{}" */
101+
#define BSON_MIN_SIZE 5
100102

101103
/* Get an error class from the bson.errors module.
102104
*
@@ -1430,7 +1432,7 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
14301432
return result;
14311433
}
14321434

1433-
static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1435+
static PyObject* get_value(PyObject* self, const char* buffer, unsigned* position,
14341436
int type, int max, PyObject* as_class,
14351437
unsigned char tz_aware, unsigned char uuid_subtype) {
14361438
struct module_state *state = GETSTATE(self);
@@ -1455,28 +1457,44 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
14551457
case 2:
14561458
case 14:
14571459
{
1458-
int value_length = ((int*)(buffer + *position))[0] - 1;
1459-
if (max < value_length) {
1460+
unsigned value_length;
1461+
if (max < 4) {
1462+
goto invalid;
1463+
}
1464+
memcpy(&value_length, buffer + *position, 4);
1465+
/* Encoded string length + string */
1466+
if (max < 4 + value_length) {
14601467
goto invalid;
14611468
}
14621469
*position += 4;
1463-
value = PyUnicode_DecodeUTF8(buffer + *position, value_length, "strict");
1470+
/* Strings must end in \0 */
1471+
if (buffer[*position + value_length - 1]) {
1472+
goto invalid;
1473+
}
1474+
value = PyUnicode_DecodeUTF8(buffer + *position, value_length - 1, "strict");
14641475
if (!value) {
14651476
return NULL;
14661477
}
1467-
*position += value_length + 1;
1478+
*position += value_length;
14681479
break;
14691480
}
14701481
case 3:
14711482
{
14721483
PyObject* collection;
1473-
int size;
1484+
unsigned size;
1485+
if (max < 4) {
1486+
goto invalid;
1487+
}
14741488
memcpy(&size, buffer + *position, 4);
1475-
if (size < 0 || max < size) {
1489+
if (size < BSON_MIN_SIZE || max < size) {
1490+
goto invalid;
1491+
}
1492+
/* Check for bad eoo */
1493+
if (buffer[*position + size - 1]) {
14761494
goto invalid;
14771495
}
14781496
value = elements_to_dict(self, buffer + *position + 4,
1479-
size - 5, as_class, tz_aware, uuid_subtype);
1497+
(int)size - 5, as_class, tz_aware, uuid_subtype);
14801498
if (!value) {
14811499
return NULL;
14821500
}
@@ -1530,14 +1548,20 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
15301548
}
15311549
case 4:
15321550
{
1533-
int size,
1534-
end;
1551+
unsigned size, end;
15351552

1553+
if (max < 4) {
1554+
goto invalid;
1555+
}
15361556
memcpy(&size, buffer + *position, 4);
15371557
if (max < size) {
15381558
goto invalid;
15391559
}
15401560
end = *position + size - 1;
1561+
/* Check for bad eoo */
1562+
if (buffer[end]) {
1563+
goto invalid;
1564+
}
15411565
*position += 4;
15421566

15431567
value = PyList_New(0);
@@ -1549,14 +1573,19 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
15491573

15501574
int bson_type = (int)buffer[(*position)++];
15511575
size_t key_size = strlen(buffer + *position);
1552-
if (key_size > BSON_MAX_SIZE) {
1576+
if (max < (int)key_size) {
15531577
Py_DECREF(value);
15541578
goto invalid;
15551579
}
15561580
/* just skip the key, they're in order. */
1557-
*position += (int)key_size + 1;
1581+
*position += (unsigned)key_size + 1;
1582+
if (Py_EnterRecursiveCall(" while decoding a list value")) {
1583+
Py_DECREF(value);
1584+
return NULL;
1585+
}
15581586
to_append = get_value(self, buffer, position, bson_type,
15591587
max - (int)key_size, as_class, tz_aware, uuid_subtype);
1588+
Py_LeaveRecursiveCall();
15601589
if (!to_append) {
15611590
Py_DECREF(value);
15621591
return NULL;
@@ -1572,8 +1601,11 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
15721601
PyObject* data;
15731602
PyObject* st;
15741603
PyObject* type_to_create;
1575-
int length, subtype;
1604+
unsigned length, subtype;
15761605

1606+
if (max < 4) {
1607+
goto invalid;
1608+
}
15771609
memcpy(&length, buffer + *position, 4);
15781610
if (max < length) {
15791611
goto invalid;
@@ -1779,7 +1811,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
17791811
if (!pattern) {
17801812
return NULL;
17811813
}
1782-
*position += (int)pattern_length + 1;
1814+
*position += (unsigned)pattern_length + 1;
17831815
if ((flags_length = strlen(buffer + *position)) > BSON_MAX_SIZE) {
17841816
Py_DECREF(pattern);
17851817
goto invalid;
@@ -1804,7 +1836,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
18041836
flags |= 64;
18051837
}
18061838
}
1807-
*position += (int)flags_length + 1;
1839+
*position += (unsigned)flags_length + 1;
18081840
if ((compile_func = _get_object(state->RECompile, "re", "compile"))) {
18091841
value = PyObject_CallFunction(compile_func, "Oi", pattern, flags);
18101842
Py_DECREF(compile_func);
@@ -1814,23 +1846,32 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
18141846
}
18151847
case 12:
18161848
{
1817-
size_t coll_length;
1849+
unsigned coll_length;
18181850
PyObject* collection;
18191851
PyObject* id = NULL;
18201852
PyObject* objectid_type;
18211853
PyObject* dbref_type;
18221854

1855+
if (max < 4) {
1856+
goto invalid;
1857+
}
1858+
memcpy(&coll_length, buffer + *position, 4);
1859+
/* Encoded string length + string + 12 byte ObjectId */
1860+
if (max < 4 + coll_length + 12) {
1861+
goto invalid;
1862+
}
18231863
*position += 4;
1824-
coll_length = strlen(buffer + *position);
1825-
if (coll_length > BSON_MAX_SIZE || max < (int)coll_length + 12) {
1864+
/* Strings must end in \0 */
1865+
if (buffer[*position + coll_length - 1]) {
18261866
goto invalid;
18271867
}
1868+
18281869
collection = PyUnicode_DecodeUTF8(buffer + *position,
1829-
coll_length, "strict");
1870+
coll_length - 1, "strict");
18301871
if (!collection) {
18311872
return NULL;
18321873
}
1833-
*position += (int)coll_length + 1;
1874+
*position += coll_length;
18341875

18351876
if ((objectid_type = _get_object(state->ObjectId, "bson.objectid", "ObjectId"))) {
18361877
id = PyObject_CallFunction(objectid_type, "s#", buffer + *position, 12);
@@ -1853,16 +1894,25 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
18531894
{
18541895
PyObject* code;
18551896
PyObject* code_type;
1856-
int value_length = ((int*)(buffer + *position))[0] - 1;
1857-
if (max < value_length) {
1897+
unsigned value_length;
1898+
if (max < 4) {
1899+
goto invalid;
1900+
}
1901+
memcpy(&value_length, buffer + *position, 4);
1902+
/* Encoded string length + string */
1903+
if (max < 4 + value_length) {
18581904
goto invalid;
18591905
}
18601906
*position += 4;
1861-
code = PyUnicode_DecodeUTF8(buffer + *position, value_length, "strict");
1907+
/* Strings must end in \0 */
1908+
if (buffer[*position + value_length - 1]) {
1909+
goto invalid;
1910+
}
1911+
code = PyUnicode_DecodeUTF8(buffer + *position, value_length - 1, "strict");
18621912
if (!code) {
18631913
return NULL;
18641914
}
1865-
*position += value_length + 1;
1915+
*position += value_length;
18661916
if ((code_type = _get_object(state->Code, "bson.code", "Code"))) {
18671917
value = PyObject_CallFunctionObjArgs(code_type, code, NULL, NULL);
18681918
Py_DECREF(code_type);
@@ -1872,25 +1922,56 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
18721922
}
18731923
case 15:
18741924
{
1875-
size_t code_length;
1876-
int scope_size;
1925+
unsigned c_w_s_size;
1926+
unsigned code_size;
1927+
unsigned scope_size;
18771928
PyObject* code;
18781929
PyObject* scope;
18791930
PyObject* code_type;
18801931

1881-
*position += 8;
1882-
code_length = strlen(buffer + *position);
1883-
if (code_length > BSON_MAX_SIZE || max < 8 + (int)code_length) {
1932+
if (max < 8) {
1933+
goto invalid;
1934+
}
1935+
1936+
memcpy(&c_w_s_size, buffer + *position, 4);
1937+
*position += 4;
1938+
1939+
if (max < c_w_s_size) {
1940+
goto invalid;
1941+
}
1942+
1943+
memcpy(&code_size, buffer + *position, 4);
1944+
/* code_w_scope length + code length + code + scope length */
1945+
if (max < 4 + 4 + code_size + 4) {
1946+
goto invalid;
1947+
}
1948+
*position += 4;
1949+
/* Strings must end in \0 */
1950+
if (buffer[*position + code_size - 1]) {
18841951
goto invalid;
18851952
}
1886-
code = PyUnicode_DecodeUTF8(buffer + *position, code_length, "strict");
1953+
code = PyUnicode_DecodeUTF8(buffer + *position, code_size - 1, "strict");
18871954
if (!code) {
18881955
return NULL;
18891956
}
1890-
*position += (int)code_length + 1;
1957+
*position += code_size;
18911958

18921959
memcpy(&scope_size, buffer + *position, 4);
1893-
scope = elements_to_dict(self, buffer + *position + 4, scope_size - 5,
1960+
if (scope_size < BSON_MIN_SIZE) {
1961+
Py_DECREF(code);
1962+
goto invalid;
1963+
}
1964+
/* code length + code + scope length + scope */
1965+
if ((4 + code_size + 4 + scope_size) != c_w_s_size) {
1966+
Py_DECREF(code);
1967+
goto invalid;
1968+
}
1969+
1970+
/* Check for bad eoo */
1971+
if (buffer[*position + scope_size - 1]) {
1972+
goto invalid;
1973+
}
1974+
scope = elements_to_dict(self, buffer + *position + 4, (int)scope_size - 5,
18941975
(PyObject*)&PyDict_Type, tz_aware, uuid_subtype);
18951976
if (!scope) {
18961977
Py_DECREF(code);
@@ -1989,16 +2070,17 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
19892070

19902071
error = _error("InvalidBSON");
19912072
if (error) {
1992-
PyErr_SetNone(error);
2073+
PyErr_SetString(error,
2074+
"invalid length or type code");
19932075
Py_DECREF(error);
19942076
}
19952077
return NULL;
19962078
}
19972079

1998-
static PyObject* elements_to_dict(PyObject* self, const char* string, int max,
2080+
static PyObject* _elements_to_dict(PyObject* self, const char* string, int max,
19992081
PyObject* as_class, unsigned char tz_aware,
20002082
unsigned char uuid_subtype) {
2001-
int position = 0;
2083+
unsigned position = 0;
20022084
PyObject* dict = PyObject_CallObject(as_class, NULL);
20032085
if (!dict) {
20042086
return NULL;
@@ -2038,6 +2120,18 @@ static PyObject* elements_to_dict(PyObject* self, const char* string, int max,
20382120
return dict;
20392121
}
20402122

2123+
static PyObject* elements_to_dict(PyObject* self, const char* string, int max,
2124+
PyObject* as_class, unsigned char tz_aware,
2125+
unsigned char uuid_subtype) {
2126+
PyObject* result;
2127+
if (Py_EnterRecursiveCall(" while decoding a BSON document"))
2128+
return NULL;
2129+
result = _elements_to_dict(self, string, max,
2130+
as_class, tz_aware, uuid_subtype);
2131+
Py_LeaveRecursiveCall();
2132+
return result;
2133+
}
2134+
20412135
static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
20422136
int size;
20432137
Py_ssize_t total_size;
@@ -2068,7 +2162,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
20682162
#else
20692163
total_size = PyString_Size(bson);
20702164
#endif
2071-
if (total_size < 5) {
2165+
if (total_size < BSON_MIN_SIZE) {
20722166
PyObject* InvalidBSON = _error("InvalidBSON");
20732167
if (InvalidBSON) {
20742168
PyErr_SetString(InvalidBSON,
@@ -2088,7 +2182,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
20882182
}
20892183

20902184
memcpy(&size, string, 4);
2091-
if (size < 0) {
2185+
if (size < BSON_MIN_SIZE) {
20922186
PyObject* InvalidBSON = _error("InvalidBSON");
20932187
if (InvalidBSON) {
20942188
PyErr_SetString(InvalidBSON, "invalid message size");
@@ -2097,7 +2191,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
20972191
return NULL;
20982192
}
20992193

2100-
if (total_size < size) {
2194+
if (total_size < size || total_size > BSON_MAX_SIZE) {
21012195
PyObject* InvalidBSON = _error("InvalidBSON");
21022196
if (InvalidBSON) {
21032197
PyErr_SetString(InvalidBSON, "objsize too large");
@@ -2173,7 +2267,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
21732267
return NULL;
21742268

21752269
while (total_size > 0) {
2176-
if (total_size < 5) {
2270+
if (total_size < BSON_MIN_SIZE) {
21772271
PyObject* InvalidBSON = _error("InvalidBSON");
21782272
if (InvalidBSON) {
21792273
PyErr_SetString(InvalidBSON,
@@ -2185,7 +2279,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
21852279
}
21862280

21872281
memcpy(&size, string, 4);
2188-
if (size < 0) {
2282+
if (size < BSON_MIN_SIZE) {
21892283
PyObject* InvalidBSON = _error("InvalidBSON");
21902284
if (InvalidBSON) {
21912285
PyErr_SetString(InvalidBSON, "invalid message size");

0 commit comments

Comments
 (0)