Skip to content

Commit e7b5fc9

Browse files
authored
fix: Address queries not fully satisfying requested offset (#18)
1 parent 96fd5b8 commit e7b5fc9

File tree

3 files changed

+127
-1
lines changed

3 files changed

+127
-1
lines changed

google/cloud/datastore/query.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,6 @@ def _process_query_results(self, response_pb):
498498
:raises ValueError: If ``more_results`` is an unexpected value.
499499
"""
500500
self._skipped_results = response_pb.batch.skipped_results
501-
502501
if response_pb.batch.more_results == _NO_MORE_RESULTS:
503502
self.next_page_token = None
504503
else:
@@ -540,6 +539,21 @@ def _next_page(self):
540539
response_pb = self.client._datastore_api.run_query(
541540
self._query.project, partition_id, read_options, query=query_pb
542541
)
542+
543+
while (
544+
response_pb.batch.more_results == _NOT_FINISHED
545+
and response_pb.batch.skipped_results < query_pb.offset
546+
):
547+
# We haven't finished processing. A likely reason is we haven't
548+
# skipped all of the results yet. Don't return any results.
549+
# Instead, rerun query, adjusting offsets. Datastore doesn't process
550+
# more than 1000 skipped results in a query.
551+
query_pb.start_cursor = response_pb.batch.skipped_cursor
552+
query_pb.offset -= response_pb.batch.skipped_results
553+
response_pb = self.client._datastore_api.run_query(
554+
self._query.project, partition_id, read_options, query=query_pb
555+
)
556+
543557
entity_pbs = self._process_query_results(response_pb)
544558
return page_iterator.Page(self, entity_pbs, self.item_to_value)
545559

tests/system/test_system.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import datetime
1616
import os
17+
import string
1718
import unittest
1819

1920
import requests
@@ -465,6 +466,62 @@ def test_query_distinct_on(self):
465466
self.assertEqual(entities[1]["name"], "Arya")
466467

467468

469+
class TestDatastoreQueryOffsets(TestDatastore):
470+
TOTAL_OBJECTS = 2500
471+
NAMESPACE = "LargeCharacterEntity"
472+
KIND = "LargeCharacter"
473+
474+
@classmethod
475+
def setUpClass(cls):
476+
cls.CLIENT = clone_client(Config.CLIENT)
477+
# Remove the namespace from the cloned client, since these
478+
# query tests rely on the entities to be already stored
479+
# cls.CLIENT.namespace = cls.NAMESPACE
480+
cls.CLIENT.namespace = None
481+
482+
# Populating the datastore if necessary.
483+
populate_datastore.add_large_character_entities(client=cls.CLIENT)
484+
485+
@classmethod
486+
def tearDownClass(cls):
487+
# In the emulator, destroy the query entities.
488+
if os.getenv(GCD_DATASET) is not None:
489+
# Use the client for this test instead of the global.
490+
clear_datastore.remove_all_entities(client=cls.CLIENT)
491+
492+
def _base_query(self):
493+
# Use the client for this test instead of the global.
494+
return self.CLIENT.query(kind=self.KIND, namespace=self.NAMESPACE)
495+
496+
def _verify(self, limit, offset, expected):
497+
# Query used for all tests
498+
page_query = self._base_query()
499+
page_query.add_filter("family", "=", "Stark")
500+
page_query.add_filter("alive", "=", False)
501+
502+
iterator = page_query.fetch(limit=limit, offset=offset)
503+
entities = [e for e in iterator]
504+
self.assertEqual(len(entities), expected)
505+
506+
def test_query_in_bounds_offsets(self):
507+
# Verify that with no offset there are the correct # of results
508+
self._verify(limit=None, offset=None, expected=self.TOTAL_OBJECTS)
509+
510+
# Verify that with no limit there are results (offset provided)")
511+
self._verify(limit=None, offset=900, expected=self.TOTAL_OBJECTS - 900)
512+
513+
# Offset beyond items larger Verify 200 items found")
514+
self._verify(limit=200, offset=1100, expected=200)
515+
516+
def test_query_partially_out_of_bounds_offsets(self):
517+
# Offset within range, expect 50 despite larger limit")
518+
self._verify(limit=100, offset=self.TOTAL_OBJECTS - 50, expected=50)
519+
520+
def test_query_out_of_bounds_offsets(self):
521+
# Offset beyond items larger Verify no items found")
522+
self._verify(limit=200, offset=self.TOTAL_OBJECTS + 1000, expected=0)
523+
524+
468525
class TestDatastoreTransaction(TestDatastore):
469526
def test_transaction_via_with_statement(self):
470527
entity = datastore.Entity(key=Config.CLIENT.key("Company", "Google"))

tests/system/utils/populate_datastore.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from __future__ import print_function
1919

2020
import os
21+
import string
2122
import sys
2223
import time
2324
import uuid
@@ -62,6 +63,60 @@ def print_func(message):
6263
print(message)
6364

6465

66+
def add_large_character_entities(client=None):
67+
TOTAL_OBJECTS = 2500
68+
NAMESPACE = "LargeCharacterEntity"
69+
KIND = "LargeCharacter"
70+
MAX_STRING = (string.ascii_lowercase * 58)[:1500]
71+
72+
client.namespace = NAMESPACE
73+
74+
# Query used for all tests
75+
page_query = client.query(kind=KIND, namespace=NAMESPACE)
76+
77+
def put_objects(count):
78+
remaining = count
79+
current = 0
80+
81+
# Can only do 500 operations in a transaction with an overall
82+
# size limit.
83+
ENTITIES_TO_BATCH = 25
84+
while current < count:
85+
start = current
86+
end = min(current + ENTITIES_TO_BATCH, count)
87+
with client.transaction() as xact:
88+
# The name/ID for the new entity
89+
for i in range(start, end):
90+
name = "character{0:05d}".format(i)
91+
# The Cloud Datastore key for the new entity
92+
task_key = client.key(KIND, name)
93+
94+
# Prepares the new entity
95+
task = datastore.Entity(key=task_key)
96+
task["name"] = "{0:05d}".format(i)
97+
task["family"] = "Stark"
98+
task["alive"] = False
99+
100+
for i in string.ascii_lowercase:
101+
task["space-{}".format(i)] = MAX_STRING
102+
103+
# Saves the entity
104+
xact.put(task)
105+
current += ENTITIES_TO_BATCH
106+
107+
# Ensure we have 1500 entities for tests. If not, clean up type and add
108+
# new entities equal to TOTAL_OBJECTS
109+
all_entities = [e for e in page_query.fetch()]
110+
if len(all_entities) != TOTAL_OBJECTS:
111+
# Cleanup Collection if not an exact match
112+
while all_entities:
113+
entities = all_entities[:500]
114+
all_entities = all_entities[500:]
115+
client.delete_multi([e.key for e in entities])
116+
# Put objects
117+
put_objects(TOTAL_OBJECTS)
118+
119+
65120
def add_characters(client=None):
66121
if client is None:
67122
# Get a client that uses the test dataset.

0 commit comments

Comments
 (0)