Skip to content

Commit c9affc0

Browse files
committed
Use StringIO for pure python legacy batch inserts.
This reduces memory fragmentation when encoding large batches of documents for insert. Testing shows improvements in memory usage in pure python and reduced GC overhead in Jython with no reduction in performance.
1 parent 7642ea2 commit c9affc0

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

pymongo/message.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -219,10 +219,10 @@ def _insert_message(insert_message, send_safe):
219219

220220
send_safe = safe or not continue_on_error
221221
last_error = None
222-
begin = struct.pack("<i", int(continue_on_error))
223-
begin += bson._make_c_string(collection_name)
224-
message_length = len(begin)
225-
data = [begin]
222+
data = StringIO()
223+
data.write(struct.pack("<i", int(continue_on_error)))
224+
data.write(bson._make_c_string(collection_name))
225+
message_length = begin_loc = data.tell()
226226
has_docs = False
227227
for doc in docs:
228228
encoded = bson.BSON.encode(doc, check_keys, uuid_subtype)
@@ -231,14 +231,14 @@ def _insert_message(insert_message, send_safe):
231231

232232
message_length += encoded_length
233233
if message_length < client.max_message_size and not too_large:
234-
data.append(encoded)
234+
data.write(encoded)
235235
has_docs = True
236236
continue
237237

238238
if has_docs:
239239
# We have enough data, send this message.
240240
try:
241-
client._send_message(_insert_message(_EMPTY.join(data),
241+
client._send_message(_insert_message(data.getvalue(),
242242
send_safe), send_safe)
243243
# Exception type could be OperationFailure or a subtype
244244
# (e.g. DuplicateKeyError)
@@ -261,13 +261,15 @@ def _insert_message(insert_message, send_safe):
261261
" bytes." %
262262
(encoded_length, client.max_bson_size))
263263

264-
message_length = len(begin) + encoded_length
265-
data = [begin, encoded]
264+
message_length = begin_loc + encoded_length
265+
data.seek(begin_loc)
266+
data.truncate()
267+
data.write(encoded)
266268

267269
if not has_docs:
268270
raise InvalidOperation("cannot do an empty bulk insert")
269271

270-
client._send_message(_insert_message(_EMPTY.join(data), safe), safe)
272+
client._send_message(_insert_message(data.getvalue(), safe), safe)
271273

272274
# Re-raise any exception stored due to continue_on_error
273275
if last_error is not None:

0 commit comments

Comments
 (0)