@@ -97,6 +97,8 @@ static struct module_state _state;
97
97
#define JAVA_LEGACY 5
98
98
#define CSHARP_LEGACY 6
99
99
#define BSON_MAX_SIZE 2147483647
100
+ /* The smallest possible BSON document, i.e. "{}" */
101
+ #define BSON_MIN_SIZE 5
100
102
101
103
/* Get an error class from the bson.errors module.
102
104
*
@@ -1430,7 +1432,7 @@ static PyObject* _cbson_dict_to_bson(PyObject* self, PyObject* args) {
1430
1432
return result ;
1431
1433
}
1432
1434
1433
- static PyObject * get_value (PyObject * self , const char * buffer , int * position ,
1435
+ static PyObject * get_value (PyObject * self , const char * buffer , unsigned * position ,
1434
1436
int type , int max , PyObject * as_class ,
1435
1437
unsigned char tz_aware , unsigned char uuid_subtype ) {
1436
1438
struct module_state * state = GETSTATE (self );
@@ -1455,28 +1457,44 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1455
1457
case 2 :
1456
1458
case 14 :
1457
1459
{
1458
- int value_length = ((int * )(buffer + * position ))[0 ] - 1 ;
1459
- if (max < value_length ) {
1460
+ unsigned value_length ;
1461
+ if (max < 4 ) {
1462
+ goto invalid ;
1463
+ }
1464
+ memcpy (& value_length , buffer + * position , 4 );
1465
+ /* Encoded string length + string */
1466
+ if (max < 4 + value_length ) {
1460
1467
goto invalid ;
1461
1468
}
1462
1469
* position += 4 ;
1463
- value = PyUnicode_DecodeUTF8 (buffer + * position , value_length , "strict" );
1470
+ /* Strings must end in \0 */
1471
+ if (buffer [* position + value_length - 1 ]) {
1472
+ goto invalid ;
1473
+ }
1474
+ value = PyUnicode_DecodeUTF8 (buffer + * position , value_length - 1 , "strict" );
1464
1475
if (!value ) {
1465
1476
return NULL ;
1466
1477
}
1467
- * position += value_length + 1 ;
1478
+ * position += value_length ;
1468
1479
break ;
1469
1480
}
1470
1481
case 3 :
1471
1482
{
1472
1483
PyObject * collection ;
1473
- int size ;
1484
+ unsigned size ;
1485
+ if (max < 4 ) {
1486
+ goto invalid ;
1487
+ }
1474
1488
memcpy (& size , buffer + * position , 4 );
1475
- if (size < 0 || max < size ) {
1489
+ if (size < BSON_MIN_SIZE || max < size ) {
1490
+ goto invalid ;
1491
+ }
1492
+ /* Check for bad eoo */
1493
+ if (buffer [* position + size - 1 ]) {
1476
1494
goto invalid ;
1477
1495
}
1478
1496
value = elements_to_dict (self , buffer + * position + 4 ,
1479
- size - 5 , as_class , tz_aware , uuid_subtype );
1497
+ ( int ) size - 5 , as_class , tz_aware , uuid_subtype );
1480
1498
if (!value ) {
1481
1499
return NULL ;
1482
1500
}
@@ -1530,14 +1548,20 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1530
1548
}
1531
1549
case 4 :
1532
1550
{
1533
- int size ,
1534
- end ;
1551
+ unsigned size , end ;
1535
1552
1553
+ if (max < 4 ) {
1554
+ goto invalid ;
1555
+ }
1536
1556
memcpy (& size , buffer + * position , 4 );
1537
1557
if (max < size ) {
1538
1558
goto invalid ;
1539
1559
}
1540
1560
end = * position + size - 1 ;
1561
+ /* Check for bad eoo */
1562
+ if (buffer [end ]) {
1563
+ goto invalid ;
1564
+ }
1541
1565
* position += 4 ;
1542
1566
1543
1567
value = PyList_New (0 );
@@ -1549,14 +1573,19 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1549
1573
1550
1574
int bson_type = (int )buffer [(* position )++ ];
1551
1575
size_t key_size = strlen (buffer + * position );
1552
- if (key_size > BSON_MAX_SIZE ) {
1576
+ if (max < ( int ) key_size ) {
1553
1577
Py_DECREF (value );
1554
1578
goto invalid ;
1555
1579
}
1556
1580
/* just skip the key, they're in order. */
1557
- * position += (int )key_size + 1 ;
1581
+ * position += (unsigned )key_size + 1 ;
1582
+ if (Py_EnterRecursiveCall (" while decoding a list value" )) {
1583
+ Py_DECREF (value );
1584
+ return NULL ;
1585
+ }
1558
1586
to_append = get_value (self , buffer , position , bson_type ,
1559
1587
max - (int )key_size , as_class , tz_aware , uuid_subtype );
1588
+ Py_LeaveRecursiveCall ();
1560
1589
if (!to_append ) {
1561
1590
Py_DECREF (value );
1562
1591
return NULL ;
@@ -1572,8 +1601,11 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1572
1601
PyObject * data ;
1573
1602
PyObject * st ;
1574
1603
PyObject * type_to_create ;
1575
- int length , subtype ;
1604
+ unsigned length , subtype ;
1576
1605
1606
+ if (max < 4 ) {
1607
+ goto invalid ;
1608
+ }
1577
1609
memcpy (& length , buffer + * position , 4 );
1578
1610
if (max < length ) {
1579
1611
goto invalid ;
@@ -1779,7 +1811,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1779
1811
if (!pattern ) {
1780
1812
return NULL ;
1781
1813
}
1782
- * position += (int )pattern_length + 1 ;
1814
+ * position += (unsigned )pattern_length + 1 ;
1783
1815
if ((flags_length = strlen (buffer + * position )) > BSON_MAX_SIZE ) {
1784
1816
Py_DECREF (pattern );
1785
1817
goto invalid ;
@@ -1804,7 +1836,7 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1804
1836
flags |= 64 ;
1805
1837
}
1806
1838
}
1807
- * position += (int )flags_length + 1 ;
1839
+ * position += (unsigned )flags_length + 1 ;
1808
1840
if ((compile_func = _get_object (state -> RECompile , "re" , "compile" ))) {
1809
1841
value = PyObject_CallFunction (compile_func , "Oi" , pattern , flags );
1810
1842
Py_DECREF (compile_func );
@@ -1814,23 +1846,32 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1814
1846
}
1815
1847
case 12 :
1816
1848
{
1817
- size_t coll_length ;
1849
+ unsigned coll_length ;
1818
1850
PyObject * collection ;
1819
1851
PyObject * id = NULL ;
1820
1852
PyObject * objectid_type ;
1821
1853
PyObject * dbref_type ;
1822
1854
1855
+ if (max < 4 ) {
1856
+ goto invalid ;
1857
+ }
1858
+ memcpy (& coll_length , buffer + * position , 4 );
1859
+ /* Encoded string length + string + 12 byte ObjectId */
1860
+ if (max < 4 + coll_length + 12 ) {
1861
+ goto invalid ;
1862
+ }
1823
1863
* position += 4 ;
1824
- coll_length = strlen ( buffer + * position );
1825
- if (coll_length > BSON_MAX_SIZE || max < ( int ) coll_length + 12 ) {
1864
+ /* Strings must end in \0 */
1865
+ if (buffer [ * position + coll_length - 1 ] ) {
1826
1866
goto invalid ;
1827
1867
}
1868
+
1828
1869
collection = PyUnicode_DecodeUTF8 (buffer + * position ,
1829
- coll_length , "strict" );
1870
+ coll_length - 1 , "strict" );
1830
1871
if (!collection ) {
1831
1872
return NULL ;
1832
1873
}
1833
- * position += ( int ) coll_length + 1 ;
1874
+ * position += coll_length ;
1834
1875
1835
1876
if ((objectid_type = _get_object (state -> ObjectId , "bson.objectid" , "ObjectId" ))) {
1836
1877
id = PyObject_CallFunction (objectid_type , "s#" , buffer + * position , 12 );
@@ -1853,16 +1894,25 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1853
1894
{
1854
1895
PyObject * code ;
1855
1896
PyObject * code_type ;
1856
- int value_length = ((int * )(buffer + * position ))[0 ] - 1 ;
1857
- if (max < value_length ) {
1897
+ unsigned value_length ;
1898
+ if (max < 4 ) {
1899
+ goto invalid ;
1900
+ }
1901
+ memcpy (& value_length , buffer + * position , 4 );
1902
+ /* Encoded string length + string */
1903
+ if (max < 4 + value_length ) {
1858
1904
goto invalid ;
1859
1905
}
1860
1906
* position += 4 ;
1861
- code = PyUnicode_DecodeUTF8 (buffer + * position , value_length , "strict" );
1907
+ /* Strings must end in \0 */
1908
+ if (buffer [* position + value_length - 1 ]) {
1909
+ goto invalid ;
1910
+ }
1911
+ code = PyUnicode_DecodeUTF8 (buffer + * position , value_length - 1 , "strict" );
1862
1912
if (!code ) {
1863
1913
return NULL ;
1864
1914
}
1865
- * position += value_length + 1 ;
1915
+ * position += value_length ;
1866
1916
if ((code_type = _get_object (state -> Code , "bson.code" , "Code" ))) {
1867
1917
value = PyObject_CallFunctionObjArgs (code_type , code , NULL , NULL );
1868
1918
Py_DECREF (code_type );
@@ -1872,25 +1922,56 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1872
1922
}
1873
1923
case 15 :
1874
1924
{
1875
- size_t code_length ;
1876
- int scope_size ;
1925
+ unsigned c_w_s_size ;
1926
+ unsigned code_size ;
1927
+ unsigned scope_size ;
1877
1928
PyObject * code ;
1878
1929
PyObject * scope ;
1879
1930
PyObject * code_type ;
1880
1931
1881
- * position += 8 ;
1882
- code_length = strlen (buffer + * position );
1883
- if (code_length > BSON_MAX_SIZE || max < 8 + (int )code_length ) {
1932
+ if (max < 8 ) {
1933
+ goto invalid ;
1934
+ }
1935
+
1936
+ memcpy (& c_w_s_size , buffer + * position , 4 );
1937
+ * position += 4 ;
1938
+
1939
+ if (max < c_w_s_size ) {
1940
+ goto invalid ;
1941
+ }
1942
+
1943
+ memcpy (& code_size , buffer + * position , 4 );
1944
+ /* code_w_scope length + code length + code + scope length */
1945
+ if (max < 4 + 4 + code_size + 4 ) {
1946
+ goto invalid ;
1947
+ }
1948
+ * position += 4 ;
1949
+ /* Strings must end in \0 */
1950
+ if (buffer [* position + code_size - 1 ]) {
1884
1951
goto invalid ;
1885
1952
}
1886
- code = PyUnicode_DecodeUTF8 (buffer + * position , code_length , "strict" );
1953
+ code = PyUnicode_DecodeUTF8 (buffer + * position , code_size - 1 , "strict" );
1887
1954
if (!code ) {
1888
1955
return NULL ;
1889
1956
}
1890
- * position += ( int ) code_length + 1 ;
1957
+ * position += code_size ;
1891
1958
1892
1959
memcpy (& scope_size , buffer + * position , 4 );
1893
- scope = elements_to_dict (self , buffer + * position + 4 , scope_size - 5 ,
1960
+ if (scope_size < BSON_MIN_SIZE ) {
1961
+ Py_DECREF (code );
1962
+ goto invalid ;
1963
+ }
1964
+ /* code length + code + scope length + scope */
1965
+ if ((4 + code_size + 4 + scope_size ) != c_w_s_size ) {
1966
+ Py_DECREF (code );
1967
+ goto invalid ;
1968
+ }
1969
+
1970
+ /* Check for bad eoo */
1971
+ if (buffer [* position + scope_size - 1 ]) {
1972
+ goto invalid ;
1973
+ }
1974
+ scope = elements_to_dict (self , buffer + * position + 4 , (int )scope_size - 5 ,
1894
1975
(PyObject * )& PyDict_Type , tz_aware , uuid_subtype );
1895
1976
if (!scope ) {
1896
1977
Py_DECREF (code );
@@ -1989,16 +2070,17 @@ static PyObject* get_value(PyObject* self, const char* buffer, int* position,
1989
2070
1990
2071
error = _error ("InvalidBSON" );
1991
2072
if (error ) {
1992
- PyErr_SetNone (error );
2073
+ PyErr_SetString (error ,
2074
+ "invalid length or type code" );
1993
2075
Py_DECREF (error );
1994
2076
}
1995
2077
return NULL ;
1996
2078
}
1997
2079
1998
- static PyObject * elements_to_dict (PyObject * self , const char * string , int max ,
2080
+ static PyObject * _elements_to_dict (PyObject * self , const char * string , int max ,
1999
2081
PyObject * as_class , unsigned char tz_aware ,
2000
2082
unsigned char uuid_subtype ) {
2001
- int position = 0 ;
2083
+ unsigned position = 0 ;
2002
2084
PyObject * dict = PyObject_CallObject (as_class , NULL );
2003
2085
if (!dict ) {
2004
2086
return NULL ;
@@ -2038,6 +2120,18 @@ static PyObject* elements_to_dict(PyObject* self, const char* string, int max,
2038
2120
return dict ;
2039
2121
}
2040
2122
2123
+ static PyObject * elements_to_dict (PyObject * self , const char * string , int max ,
2124
+ PyObject * as_class , unsigned char tz_aware ,
2125
+ unsigned char uuid_subtype ) {
2126
+ PyObject * result ;
2127
+ if (Py_EnterRecursiveCall (" while decoding a BSON document" ))
2128
+ return NULL ;
2129
+ result = _elements_to_dict (self , string , max ,
2130
+ as_class , tz_aware , uuid_subtype );
2131
+ Py_LeaveRecursiveCall ();
2132
+ return result ;
2133
+ }
2134
+
2041
2135
static PyObject * _cbson_bson_to_dict (PyObject * self , PyObject * args ) {
2042
2136
int size ;
2043
2137
Py_ssize_t total_size ;
@@ -2068,7 +2162,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
2068
2162
#else
2069
2163
total_size = PyString_Size (bson );
2070
2164
#endif
2071
- if (total_size < 5 ) {
2165
+ if (total_size < BSON_MIN_SIZE ) {
2072
2166
PyObject * InvalidBSON = _error ("InvalidBSON" );
2073
2167
if (InvalidBSON ) {
2074
2168
PyErr_SetString (InvalidBSON ,
@@ -2088,7 +2182,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
2088
2182
}
2089
2183
2090
2184
memcpy (& size , string , 4 );
2091
- if (size < 0 ) {
2185
+ if (size < BSON_MIN_SIZE ) {
2092
2186
PyObject * InvalidBSON = _error ("InvalidBSON" );
2093
2187
if (InvalidBSON ) {
2094
2188
PyErr_SetString (InvalidBSON , "invalid message size" );
@@ -2097,7 +2191,7 @@ static PyObject* _cbson_bson_to_dict(PyObject* self, PyObject* args) {
2097
2191
return NULL ;
2098
2192
}
2099
2193
2100
- if (total_size < size ) {
2194
+ if (total_size < size || total_size > BSON_MAX_SIZE ) {
2101
2195
PyObject * InvalidBSON = _error ("InvalidBSON" );
2102
2196
if (InvalidBSON ) {
2103
2197
PyErr_SetString (InvalidBSON , "objsize too large" );
@@ -2173,7 +2267,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
2173
2267
return NULL ;
2174
2268
2175
2269
while (total_size > 0 ) {
2176
- if (total_size < 5 ) {
2270
+ if (total_size < BSON_MIN_SIZE ) {
2177
2271
PyObject * InvalidBSON = _error ("InvalidBSON" );
2178
2272
if (InvalidBSON ) {
2179
2273
PyErr_SetString (InvalidBSON ,
@@ -2185,7 +2279,7 @@ static PyObject* _cbson_decode_all(PyObject* self, PyObject* args) {
2185
2279
}
2186
2280
2187
2281
memcpy (& size , string , 4 );
2188
- if (size < 0 ) {
2282
+ if (size < BSON_MIN_SIZE ) {
2189
2283
PyObject * InvalidBSON = _error ("InvalidBSON" );
2190
2284
if (InvalidBSON ) {
2191
2285
PyErr_SetString (InvalidBSON , "invalid message size" );
0 commit comments