Skip to content

Commit 2613955

Browse files
committed
parallel_collection_scan -> parallel_scan PYTHON-633
Spec change. Also added better documentation with a very basic example.
1 parent 0b831cb commit 2613955

File tree

4 files changed

+32
-9
lines changed

4 files changed

+32
-9
lines changed

doc/api/pymongo/collection.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
.. automethod:: drop
3838
.. automethod:: find([spec=None[, fields=None[, skip=0[, limit=0[, timeout=True[, snapshot=False[, tailable=False[, sort=None[, max_scan=None[, as_class=None[, slave_okay=False[, await_data=False[, partial=False[, manipulate=True[, read_preference=ReadPreference.PRIMARY[, exhaust=False, [compile_re=True, [,**kwargs]]]]]]]]]]]]]]]]]])
3939
.. automethod:: find_one([spec_or_id=None[, *args[, **kwargs]]])
40-
.. automethod:: parallel_collection_scan
40+
.. automethod:: parallel_scan
4141
.. automethod:: count
4242
.. automethod:: create_index
4343
.. automethod:: ensure_index

doc/changelog.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fixes. Highlights include:
1313
:meth:`~pymongo.cursor.Cursor.max_time_ms`.
1414
- Support for writing :meth:`~pymongo.collection.Collection.aggregate`
1515
output to a collection.
16-
- A new :meth:`~pymongo.collection.Collection.parallel_collection_scan` helper.
16+
- A new :meth:`~pymongo.collection.Collection.parallel_scan` helper.
1717
- :class:`~pymongo.errors.OperationFailure` and its subclasses now include
1818
a :attr:`~pymongo.errors.OperationFailure.details` attribute with complete
1919
error details from the server.

pymongo/collection.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -859,12 +859,35 @@ def find(self, *args, **kwargs):
859859
self.secondary_acceptable_latency_ms)
860860
return Cursor(self, *args, **kwargs)
861861

862-
def parallel_collection_scan(self, num_cursors, **kwargs):
863-
"""Scan this collection in parallel.
862+
def parallel_scan(self, num_cursors, **kwargs):
863+
"""Scan this entire collection in parallel.
864864
865-
Returns a list of :class:`~pymongo.command_cursor.CommandCursor`
866-
instances that can be iterated concurrently by one or more threads
867-
or greenlets.
865+
Returns a list of up to ``num_cursors`` cursors that can be iterated
866+
concurrently. As long as the collection is not modified during
867+
scanning, each document appears once in one of the cursors' result
868+
sets.
869+
870+
For example, to process each document in a collection using some
871+
thread-safe ``process_document()`` function::
872+
873+
def process_cursor(cursor):
874+
for document in cursor:
875+
# Some thread-safe processing function:
876+
process_document(document)
877+
878+
# Get up to 4 cursors.
879+
cursors = collection.parallel_scan(4)
880+
threads = [
881+
threading.Thread(target=process_cursor, args=(cursor,))
882+
for cursor in cursors]
883+
884+
for thread in threads:
885+
thread.start()
886+
887+
for thread in threads:
888+
thread.join()
889+
890+
# All documents have now been processed.
868891
869892
With :class:`~pymongo.mongo_replica_set_client.MongoReplicaSetClient`
870893
or :class:`~pymongo.master_slave_connection.MasterSlaveConnection`,

test/test_collection.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,7 @@ def test_aggregation_cursor(self):
13811381
expected_sum,
13821382
sum(doc['_id'] for doc in cursor))
13831383

1384-
def test_parallel_collection_scan(self):
1384+
def test_parallel_scan(self):
13851385
if not version.at_least(self.db.connection, (2, 5, 5)):
13861386
raise SkipTest("Requires MongoDB >= 2.5.5")
13871387
db = self.db
@@ -1396,7 +1396,7 @@ def test_parallel_collection_scan(self):
13961396
coll.insert(({'_id': i} for i in xrange(8000)), w=self.w)
13971397
docs = []
13981398
threads = [threading.Thread(target=docs.extend, args=(cursor,))
1399-
for cursor in coll.parallel_collection_scan(3)]
1399+
for cursor in coll.parallel_scan(3)]
14001400
for t in threads:
14011401
t.start()
14021402
for t in threads:

0 commit comments

Comments
 (0)