Skip to content

Commit 45b9b78

Browse files
author
Mike Dirolf
committed
shuffle _id to front in encoder rather than as a SONManipulator. also clean up some of the module reloaded tests in the C extension
1 parent 65a41da commit 45b9b78

File tree

4 files changed

+93
-95
lines changed

4 files changed

+93
-95
lines changed

pymongo/_cbsonmodule.c

Lines changed: 86 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,10 @@ static int validate_ascii(const char* data, int length) {
348348
}
349349

350350
/* TODO our platform better be little-endian w/ 4-byte ints! */
351-
/* returns 0 on failure */
351+
/* Write a single value to the buffer (also write it's type_byte, for which
352+
* space has already been reserved.
353+
*
354+
* returns 0 on failure */
352355
static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject* value, unsigned char check_keys) {
353356
/* TODO this isn't quite the same as the Python version:
354357
* here we check for type equivalence, not isinstance in some
@@ -771,13 +774,23 @@ static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject*
771774
DBRef = PyObject_GetAttrString(module, "DBRef");
772775
Py_DECREF(module);
773776

777+
module = PyImport_ImportModule("uuid");
778+
if (!module) {
779+
UUID = NULL;
780+
PyErr_Clear();
781+
} else {
782+
UUID = PyObject_GetAttrString(module, "UUID");
783+
Py_DECREF(module);
784+
}
785+
774786
if (PyObject_IsInstance(value, Binary) ||
775787
PyObject_IsInstance(value, Code) ||
776788
PyObject_IsInstance(value, ObjectId) ||
777-
PyObject_IsInstance(value, DBRef)) {
789+
PyObject_IsInstance(value, DBRef) ||
790+
(UUID && PyObject_IsInstance(value, UUID))) {
778791

779792
PyErr_SetString(PyExc_RuntimeError,
780-
"A pymongo module was reloaded without the C extension being reloaded.\n"
793+
"A python module was reloaded without the C extension being reloaded.\n"
781794
"\n"
782795
"See http://www.mongodb.org/display/DOCS/PyMongo+and+mod_wsgi for"
783796
"a possible explanation / fix.");
@@ -794,25 +807,51 @@ static int write_element_to_buffer(bson_buffer* buffer, int type_byte, PyObject*
794807
}
795808
}
796809

797-
static int check_key_name(const unsigned char check_keys,
798-
const char* name,
810+
static int check_key_name(const char* name,
799811
const Py_ssize_t name_length) {
800-
if (check_keys) {
801-
int i;
802-
if (name_length > 0 && name[0] == '$') {
803-
PyObject* errmsg = PyString_FromFormat("key '%s' must not start with '$'", name);
812+
int i;
813+
if (name_length > 0 && name[0] == '$') {
814+
PyObject* errmsg = PyString_FromFormat("key '%s' must not start with '$'", name);
815+
PyErr_SetString(InvalidName, PyString_AsString(errmsg));
816+
Py_DECREF(errmsg);
817+
return 0;
818+
}
819+
for (i = 0; i < name_length; i++) {
820+
if (name[i] == '.') {
821+
PyObject* errmsg = PyString_FromFormat("key '%s' must not contain '.'", name);
804822
PyErr_SetString(InvalidName, PyString_AsString(errmsg));
805823
Py_DECREF(errmsg);
806824
return 0;
807825
}
808-
for (i = 0; i < name_length; i++) {
809-
if (name[i] == '.') {
810-
PyObject* errmsg = PyString_FromFormat("key '%s' must not contain '.'", name);
811-
PyErr_SetString(InvalidName, PyString_AsString(errmsg));
812-
Py_DECREF(errmsg);
813-
return 0;
814-
}
815-
}
826+
}
827+
return 1;
828+
}
829+
830+
/* Write a (key, value) pair to the buffer.
831+
*
832+
* Returns 0 on failure */
833+
static int write_pair(bson_buffer* buffer, const char* name, Py_ssize_t name_length, PyObject* value, unsigned char check_keys, unsigned char allow_id) {
834+
int type_byte;
835+
836+
/* Don't write any _id elements unless we're explicitly told to -
837+
* _id has to be written first so we write do so, but don't bother
838+
* deleting it from the dictionary being written. */
839+
if (!allow_id && strcmp(name, "_id") == 0) {
840+
return 1;
841+
}
842+
843+
type_byte = buffer_save_bytes(buffer, 1);
844+
if (type_byte == -1) {
845+
return 0;
846+
}
847+
if (check_keys && !check_key_name(name, name_length)) {
848+
return 0;
849+
}
850+
if (!buffer_write_bytes(buffer, name, name_length + 1)) {
851+
return 0;
852+
}
853+
if (!write_element_to_buffer(buffer, type_byte, value, check_keys)) {
854+
return 0;
816855
}
817856
return 1;
818857
}
@@ -830,8 +869,6 @@ static int write_son(bson_buffer* buffer, PyObject* dict, int start_position,
830869
PyObject* key;
831870
PyObject* value;
832871
PyObject* encoded;
833-
int type_byte;
834-
Py_ssize_t name_length;
835872

836873
key = PyList_GetItem(keys, i);
837874
if (!key) {
@@ -843,11 +880,6 @@ static int write_son(bson_buffer* buffer, PyObject* dict, int start_position,
843880
Py_DECREF(keys);
844881
return 0;
845882
}
846-
type_byte = buffer_save_bytes(buffer, 1);
847-
if (type_byte == -1) {
848-
Py_DECREF(keys);
849-
return 0;
850-
}
851883
if (PyUnicode_CheckExact(key)) {
852884
encoded = PyUnicode_AsUTF8String(key);
853885
if (!encoded) {
@@ -858,28 +890,11 @@ static int write_son(bson_buffer* buffer, PyObject* dict, int start_position,
858890
encoded = key;
859891
Py_INCREF(encoded);
860892
}
861-
name_length = PyString_Size(encoded);
862-
{
863-
const char* name = PyString_AsString(encoded);
864-
if (!name) {
865-
Py_DECREF(keys);
866-
Py_DECREF(encoded);
867-
return 0;
868-
}
869-
if (!check_key_name(check_keys, name, name_length)) {
870-
Py_DECREF(keys);
871-
Py_DECREF(encoded);
872-
return 0;
873-
}
874-
if (!buffer_write_bytes(buffer, name, name_length + 1)) {
875-
Py_DECREF(keys);
876-
Py_DECREF(encoded);
877-
return 0;
878-
}
879-
}
880-
Py_DECREF(encoded);
881-
if (!write_element_to_buffer(buffer, type_byte, value, check_keys)) {
893+
/* Don't allow writing _id here - it was written above. */
894+
if (!write_pair(buffer, PyString_AsString(encoded),
895+
PyString_Size(encoded), value, check_keys, 0)) {
882896
Py_DECREF(keys);
897+
Py_DECREF(encoded);
883898
return 0;
884899
}
885900
}
@@ -893,28 +908,37 @@ static int write_dict(bson_buffer* buffer, PyObject* dict, unsigned char check_k
893908
char zero = 0;
894909
int length;
895910

911+
int is_dict = PyDict_Check(dict);
912+
896913
/* save space for length */
897914
int length_location = buffer_save_bytes(buffer, 4);
898915
if (length_location == -1) {
899916
return 0;
900917
}
901918

919+
/* Write _id first if we have one, whether or not we're SON. */
920+
if (is_dict) {
921+
PyObject* _id = PyDict_GetItemString(dict, "_id");
922+
if (_id) {
923+
/* Don't bother checking keys, but do make sure we're allowed to
924+
* write _id */
925+
if (!write_pair(buffer, "_id", 3, _id, 0, 1)) {
926+
return 0;
927+
}
928+
}
929+
}
930+
902931
if (PyObject_IsInstance(dict, SON)) {
903932
if (!write_son(buffer, dict, start_position, length_location, check_keys)) {
904933
return 0;
905934
}
906-
} else if (PyDict_Check(dict)) {
935+
} else if (is_dict) {
907936
PyObject* key;
908937
PyObject* value;
909938
Py_ssize_t pos = 0;
939+
910940
while (PyDict_Next(dict, &pos, &key, &value)) {
911941
PyObject* encoded;
912-
Py_ssize_t name_length;
913-
914-
int type_byte = buffer_save_bytes(buffer, 1);
915-
if (type_byte == -1) {
916-
return 0;
917-
}
918942
if (PyUnicode_CheckExact(key)) {
919943
encoded = PyUnicode_AsUTF8String(key);
920944
if (!encoded) {
@@ -924,51 +948,20 @@ static int write_dict(bson_buffer* buffer, PyObject* dict, unsigned char check_k
924948
encoded = key;
925949
Py_INCREF(encoded);
926950
}
927-
name_length = PyString_Size(encoded);
928-
{
929-
const char* name = PyString_AsString(encoded);
930-
if (!name) {
931-
Py_DECREF(encoded);
932-
return 0;
933-
}
934-
if (!check_key_name(check_keys, name, name_length)) {
935-
Py_DECREF(encoded);
936-
return 0;
937-
}
938-
if (!buffer_write_bytes(buffer, name, name_length + 1)) {
939-
Py_DECREF(encoded);
940-
return 0;
941-
}
942-
}
943-
Py_DECREF(encoded);
944-
if (!write_element_to_buffer(buffer, type_byte, value, check_keys)) {
951+
/* Don't allow writing _id here - it was written above. */
952+
if (!write_pair(buffer, PyString_AsString(encoded),
953+
PyString_Size(encoded), value, check_keys, 0)) {
954+
Py_DECREF(encoded);
945955
return 0;
946956
}
947957
}
948958
} else {
949-
/* Try getting the SON class again
950-
* This can be necessary if pymongo.son has been reloaded, since
951-
* reloading the module breaks IsInstance.
952-
* The main reason we even try this is to raise an informative exception
953-
* about it. */
954-
PyObject* son_module = PyImport_ImportModule("pymongo.son");
955-
SON = PyObject_GetAttrString(son_module, "SON");
956-
Py_DECREF(son_module);
957-
if (PyObject_IsInstance(dict, SON)) {
958-
PyErr_SetString(PyExc_RuntimeError,
959-
"pymongo.son was reloaded without the C extension being reloaded.\n"
960-
"\n"
961-
"See http://www.mongodb.org/display/DOCS/PyMongo+and+mod_wsgi for"
962-
"a possible explanation / fix.");
963-
return 0;
964-
} else {
965-
PyObject* errmsg = PyString_FromString("encoder expected a mapping type but got: ");
966-
PyObject* repr = PyObject_Repr(dict);
967-
PyString_ConcatAndDel(&errmsg, repr);
968-
PyErr_SetString(PyExc_TypeError, PyString_AsString(errmsg));
969-
Py_DECREF(errmsg);
970-
return 0;
971-
}
959+
PyObject* errmsg = PyString_FromString("encoder expected a mapping type but got: ");
960+
PyObject* repr = PyObject_Repr(dict);
961+
PyString_ConcatAndDel(&errmsg, repr);
962+
PyErr_SetString(PyExc_TypeError, PyString_AsString(errmsg));
963+
Py_DECREF(errmsg);
964+
return 0;
972965
}
973966

974967
/* write null byte and fill in length */

pymongo/bson.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,11 @@ def _element_to_bson(key, value, check_keys):
464464
def _dict_to_bson(dict, check_keys):
465465
try:
466466
elements = ""
467+
if "_id" in dict:
468+
elements += _element_to_bson("_id", dict["_id"], False)
467469
for (key, value) in dict.iteritems():
468-
elements += _element_to_bson(key, value, check_keys)
470+
if key != "_id":
471+
elements += _element_to_bson(key, value, check_keys)
469472
except AttributeError:
470473
raise TypeError("encoder expected a mapping type but got: %r" % dict)
471474

pymongo/database.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ def __init__(self, connection, name):
5757
self.__outgoing_manipulators = []
5858
self.__outgoing_copying_manipulators = []
5959
self.add_son_manipulator(ObjectIdInjector())
60-
self.add_son_manipulator(ObjectIdShuffler())
6160

6261
def __check_name(self, name):
6362
for invalid_char in [" ", ".", "$", "/", "\\"]:

pymongo/son_manipulator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ def transform_incoming(self, son, collection):
7676
return son
7777

7878

79+
# This is now handled during BSON encoding (for performance reasons),
80+
# but I'm keeping this here as a reference for those implementing new
81+
# SONManipulators.
7982
class ObjectIdShuffler(SONManipulator):
8083
"""A son manipulator that moves _id to the first position.
8184
"""

0 commit comments

Comments
 (0)