@@ -118,10 +118,12 @@ _downcast_and_check(Py_ssize_t size, int extra) {
118118 */
119119int convert_codec_options (PyObject * options_obj , void * p ) {
120120 codec_options_t * options = (codec_options_t * )p ;
121- if (!PyArg_ParseTuple (options_obj , "Obb" ,
121+ options -> unicode_decode_error_handler = NULL ;
122+ if (!PyArg_ParseTuple (options_obj , "Obbz" ,
122123 & options -> document_class ,
123124 & options -> tz_aware ,
124- & options -> uuid_rep )) {
125+ & options -> uuid_rep ,
126+ & options -> unicode_decode_error_handler )) {
125127 return 0 ;
126128 }
127129
@@ -137,6 +139,7 @@ void default_codec_options(codec_options_t* options) {
137139 // TODO: set to "1". PYTHON-526, setting tz_aware=True by default.
138140 options -> tz_aware = 0 ;
139141 options -> uuid_rep = PYTHON_LEGACY ;
142+ options -> unicode_decode_error_handler = NULL ;
140143}
141144
142145void destroy_codec_options (codec_options_t * options ) {
@@ -1560,7 +1563,9 @@ static PyObject* get_value(PyObject* self, const char* buffer,
15601563 if (buffer [* position + value_length - 1 ]) {
15611564 goto invalid ;
15621565 }
1563- value = PyUnicode_DecodeUTF8 (buffer + * position , value_length - 1 , "strict" );
1566+ value = PyUnicode_DecodeUTF8 (
1567+ buffer + * position , value_length - 1 ,
1568+ options -> unicode_decode_error_handler );
15641569 if (!value ) {
15651570 goto invalid ;
15661571 }
@@ -1916,7 +1921,9 @@ static PyObject* get_value(PyObject* self, const char* buffer,
19161921 if (pattern_length > BSON_MAX_SIZE || max < pattern_length ) {
19171922 goto invalid ;
19181923 }
1919- pattern = PyUnicode_DecodeUTF8 (buffer + * position , pattern_length , "strict" );
1924+ pattern = PyUnicode_DecodeUTF8 (
1925+ buffer + * position , pattern_length ,
1926+ options -> unicode_decode_error_handler );
19201927 if (!pattern ) {
19211928 goto invalid ;
19221929 }
@@ -1980,8 +1987,9 @@ static PyObject* get_value(PyObject* self, const char* buffer,
19801987 goto invalid ;
19811988 }
19821989
1983- collection = PyUnicode_DecodeUTF8 (buffer + * position ,
1984- coll_length - 1 , "strict" );
1990+ collection = PyUnicode_DecodeUTF8 (
1991+ buffer + * position , coll_length - 1 ,
1992+ options -> unicode_decode_error_handler );
19851993 if (!collection ) {
19861994 goto invalid ;
19871995 }
@@ -2026,7 +2034,9 @@ static PyObject* get_value(PyObject* self, const char* buffer,
20262034 if (buffer [* position + value_length - 1 ]) {
20272035 goto invalid ;
20282036 }
2029- code = PyUnicode_DecodeUTF8 (buffer + * position , value_length - 1 , "strict" );
2037+ code = PyUnicode_DecodeUTF8 (
2038+ buffer + * position , value_length - 1 ,
2039+ options -> unicode_decode_error_handler );
20302040 if (!code ) {
20312041 goto invalid ;
20322042 }
@@ -2068,7 +2078,9 @@ static PyObject* get_value(PyObject* self, const char* buffer,
20682078 if (buffer [* position + code_size - 1 ]) {
20692079 goto invalid ;
20702080 }
2071- code = PyUnicode_DecodeUTF8 (buffer + * position , code_size - 1 , "strict" );
2081+ code = PyUnicode_DecodeUTF8 (
2082+ buffer + * position , code_size - 1 ,
2083+ options -> unicode_decode_error_handler );
20722084 if (!code ) {
20732085 goto invalid ;
20742086 }
@@ -2261,8 +2273,29 @@ static PyObject* _elements_to_dict(PyObject* self, const char* string,
22612273 Py_DECREF (dict );
22622274 return NULL ;
22632275 }
2264- name = PyUnicode_DecodeUTF8 (string + position , name_length , "strict" );
2276+ name = PyUnicode_DecodeUTF8 (
2277+ string + position , name_length ,
2278+ options -> unicode_decode_error_handler );
22652279 if (!name ) {
2280+ /* If NULL is returned then wrap the UnicodeDecodeError
2281+ in an InvalidBSON error */
2282+ PyObject * etype , * evalue , * etrace ;
2283+ PyObject * InvalidBSON ;
2284+
2285+ PyErr_Fetch (& etype , & evalue , & etrace );
2286+ InvalidBSON = _error ("InvalidBSON" );
2287+ if (InvalidBSON ) {
2288+ Py_DECREF (etype );
2289+ etype = InvalidBSON ;
2290+
2291+ if (evalue ) {
2292+ PyObject * msg = PyObject_Str (evalue );
2293+ Py_DECREF (evalue );
2294+ evalue = msg ;
2295+ }
2296+ PyErr_NormalizeException (& etype , & evalue , & etrace );
2297+ }
2298+ PyErr_Restore (etype , evalue , etrace );
22662299 Py_DECREF (dict );
22672300 return NULL ;
22682301 }
0 commit comments