Skip to content

Commit 93e61c3

Browse files
author
Mike Dirolf
committed
add InvalidStringData exception and speed up check for c extension string encoding
1 parent 721a763 commit 93e61c3

File tree

4 files changed

+32
-14
lines changed

4 files changed

+32
-14
lines changed

pymongo/_cbsonmodule.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
static PyObject* CBSONError;
2828
static PyObject* InvalidName;
2929
static PyObject* InvalidDocument;
30+
static PyObject* InvalidStringData;
3031
static PyObject* SON;
3132
static PyObject* Binary;
3233
static PyObject* Code;
@@ -153,7 +154,7 @@ static int write_string(bson_buffer* buffer, PyObject* py_string) {
153154

154155
for (i = 0; i < string_length - 1; i++) {
155156
if (string[i] == 0) {
156-
PyErr_SetString(InvalidDocument, "BSON strings must not contain a NULL character");
157+
PyErr_SetString(InvalidStringData, "BSON strings must not contain a NULL character");
157158
return 0;
158159
}
159160
}
@@ -167,6 +168,17 @@ static int write_string(bson_buffer* buffer, PyObject* py_string) {
167168
return 1;
168169
}
169170

171+
/* returns 0 on invalid ascii */
172+
static int validate_ascii(const char* data, int length) {
173+
int i;
174+
for (i = 0; i < length; i++) {
175+
if (data[i] & 0x80) {
176+
return 0;
177+
}
178+
}
179+
return 1;
180+
}
181+
170182
/* TODO our platform better be little-endian w/ 4-byte ints! */
171183
/* returns 0 on failure */
172184
static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject* value, unsigned char check_keys) {
@@ -330,17 +342,14 @@ static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject*
330342
memcpy(buffer->buffer + length_location, &length, 4);
331343
return 1;
332344
} else if (PyString_Check(value)) {
333-
PyObject* encoded;
334345
int result;
335346

336347
*(buffer->buffer + type_byte) = 0x02;
337-
/* we have to do the encoding so we can fail fast if they give us non utf-8 */
338-
encoded = PyString_AsEncodedObject(value, "utf-8", "strict");
339-
if (!encoded) {
348+
if (!validate_ascii(PyString_AsString(value), PyString_Size(value))) {
349+
PyErr_SetString(InvalidStringData, "strings in documents must be ASCII only");
340350
return 0;
341351
}
342-
result = write_string(buffer, encoded);
343-
Py_DECREF(encoded);
352+
result = write_string(buffer, value);
344353
return result;
345354
} else if (PyUnicode_Check(value)) {
346355
PyObject* encoded;
@@ -1180,6 +1189,7 @@ PyMODINIT_FUNC init_cbson(void) {
11801189
CBSONError = PyObject_GetAttrString(module, "InvalidDocument");
11811190
InvalidName = PyObject_GetAttrString(module, "InvalidName");
11821191
InvalidDocument = PyObject_GetAttrString(module, "InvalidDocument");
1192+
InvalidStringData = PyObject_GetAttrString(module, "InvalidStringData");
11831193
Py_DECREF(module);
11841194

11851195
module = PyImport_ImportModule("pymongo.son");

pymongo/bson.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
from objectid import ObjectId
2929
from dbref import DBRef
3030
from son import SON
31-
from errors import InvalidBSON, InvalidDocument, UnsupportedTag, InvalidName
31+
from errors import InvalidBSON, InvalidDocument, UnsupportedTag
32+
from errors import InvalidName, InvalidStringData
3233

3334
try:
3435
import _cbson
@@ -57,8 +58,11 @@ def _get_c_string(data):
5758

5859
def _make_c_string(string):
5960
if "\x00" in string:
60-
raise InvalidDocument("BSON strings must not contain a NULL character")
61-
return string.encode("utf-8") + "\x00"
61+
raise InvalidStringData("BSON strings must not contain a NULL character")
62+
try:
63+
return string.encode("utf-8") + "\x00"
64+
except:
65+
raise InvalidStringData("strings in documents must be ASCII only")
6266

6367

6468
def _validate_number(data):

pymongo/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ class InvalidBSON(ValueError):
5555
"""Raised when trying to create a BSON object from invalid data.
5656
"""
5757

58+
class InvalidStringData(ValueError):
59+
"""Raised when trying to encode a string containing non-ASCII data.
60+
"""
61+
5862

5963
class InvalidDocument(ValueError):
6064
"""Raised when trying to create a BSON object from an invalid document.

test/test_bson.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from pymongo.dbref import DBRef
3131
from pymongo.son import SON
3232
from pymongo.bson import BSON, is_valid, _to_dicts
33-
from pymongo.errors import UnsupportedTag, InvalidDocument
33+
from pymongo.errors import UnsupportedTag, InvalidDocument, InvalidStringData
3434

3535

3636
class TestBSON(unittest.TestCase):
@@ -132,8 +132,8 @@ def test_basic_from_dict(self):
132132
"\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x00\x00")
133133

134134
def test_null_character_encoding(self):
135-
self.assertRaises(InvalidDocument, BSON.from_dict, {"with zero": "hello\x00world"})
136-
self.assertRaises(InvalidDocument, BSON.from_dict, {"with zero": u"hello\x00world"})
135+
self.assertRaises(InvalidStringData, BSON.from_dict, {"with zero": "hello\x00world"})
136+
self.assertRaises(InvalidStringData, BSON.from_dict, {"with zero": u"hello\x00world"})
137137

138138
def test_from_then_to_dict(self):
139139

@@ -199,7 +199,7 @@ def test_data_files(self):
199199
f.close()
200200

201201
def test_bad_encode(self):
202-
self.assertRaises(UnicodeDecodeError, BSON.from_dict,
202+
self.assertRaises(InvalidStringData, BSON.from_dict,
203203
{"lalala": '\xf4\xe0\xf0\xe1\xc0 Color Touch'})
204204

205205
def test_overflow(self):

0 commit comments

Comments
 (0)