2727static PyObject * CBSONError ;
2828static PyObject * InvalidName ;
2929static PyObject * InvalidDocument ;
30+ static PyObject * InvalidStringData ;
3031static PyObject * SON ;
3132static PyObject * Binary ;
3233static PyObject * Code ;
@@ -153,7 +154,7 @@ static int write_string(bson_buffer* buffer, PyObject* py_string) {
153154
154155 for (i = 0 ; i < string_length - 1 ; i ++ ) {
155156 if (string [i ] == 0 ) {
156- PyErr_SetString (InvalidDocument , "BSON strings must not contain a NULL character" );
157+ PyErr_SetString (InvalidStringData , "BSON strings must not contain a NULL character" );
157158 return 0 ;
158159 }
159160 }
@@ -167,6 +168,17 @@ static int write_string(bson_buffer* buffer, PyObject* py_string) {
167168 return 1 ;
168169}
169170
171+ /* returns 0 on invalid ascii */
172+ static int validate_ascii (const char * data , int length ) {
173+ int i ;
174+ for (i = 0 ; i < length ; i ++ ) {
175+ if (data [i ] & 0x80 ) {
176+ return 0 ;
177+ }
178+ }
179+ return 1 ;
180+ }
181+
170182/* TODO our platform better be little-endian w/ 4-byte ints! */
171183/* returns 0 on failure */
172184static int write_element_to_buffer (bson_buffer * buffer , int type_byte , PyObject * value , unsigned char check_keys ) {
@@ -330,17 +342,14 @@ static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject*
330342 memcpy (buffer -> buffer + length_location , & length , 4 );
331343 return 1 ;
332344 } else if (PyString_Check (value )) {
333- PyObject * encoded ;
334345 int result ;
335346
336347 * (buffer -> buffer + type_byte ) = 0x02 ;
337- /* we have to do the encoding so we can fail fast if they give us non utf-8 */
338- encoded = PyString_AsEncodedObject (value , "utf-8" , "strict" );
339- if (!encoded ) {
348+ if (!validate_ascii (PyString_AsString (value ), PyString_Size (value ))) {
349+ PyErr_SetString (InvalidStringData , "strings in documents must be ASCII only" );
340350 return 0 ;
341351 }
342- result = write_string (buffer , encoded );
343- Py_DECREF (encoded );
352+ result = write_string (buffer , value );
344353 return result ;
345354 } else if (PyUnicode_Check (value )) {
346355 PyObject * encoded ;
@@ -1180,6 +1189,7 @@ PyMODINIT_FUNC init_cbson(void) {
11801189 CBSONError = PyObject_GetAttrString (module , "InvalidDocument" );
11811190 InvalidName = PyObject_GetAttrString (module , "InvalidName" );
11821191 InvalidDocument = PyObject_GetAttrString (module , "InvalidDocument" );
1192+ InvalidStringData = PyObject_GetAttrString (module , "InvalidStringData" );
11831193 Py_DECREF (module );
11841194
11851195 module = PyImport_ImportModule ("pymongo.son" );
0 commit comments