Skip to content

Commit ec266d3

Browse files
committed
fix: Query options were not respecting use_cache
In certain circumstances, we were not respecting use_cache for queries, unlike legacy NDB, which is quite emphatic about supporting them. (See https://github.com/GoogleCloudPlatform/datastore-ndb-python/blob/59cb209ed95480025d26531fc91397575438d2fe/ndb/query.py#L186-L187) In #613 we tried to match legacy NDB behavior by updating the cache using the results of queries. We still do that, but now we respect use_cache, which was a valid keyword argument for Query.fetch() and friends, but was not passed down to the context cache when needed. As a result, the cache could mysteriously accumulate lots of memory usage and perhaps even cause you to hit memory limits, even if it was seemingly disabled and it didn't look like there were any objects holding references to your query results. This is a problem for certain batch-style workloads where you know you're only interested in processing a certain entity once. Fixes #752
1 parent 982ee5f commit ec266d3

File tree

4 files changed

+185
-4
lines changed

4 files changed

+185
-4
lines changed

google/cloud/ndb/_datastore_query.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ def _next_batch(self):
375375
self._start_cursor = query.start_cursor
376376
self._index = 0
377377
self._batch = [
378-
_Result(result_type, result_pb, query.order_by)
378+
_Result(result_type, result_pb, query.order_by, query_options=query)
379379
for result_pb in response.batch.entity_results
380380
]
381381

@@ -755,17 +755,21 @@ class _Result(object):
755755
order_by (Optional[Sequence[query.PropertyOrder]]): Ordering for the
756756
query. Used to merge sorted result sets while maintaining sort
757757
order.
758+
query_options (Optional[QueryOptions]): Other query_options.
759+
use_cache is the only supported option.
758760
"""
759761

760762
_key = None
761763

762-
def __init__(self, result_type, result_pb, order_by=None):
764+
def __init__(self, result_type, result_pb, order_by=None, query_options=None):
763765
self.result_type = result_type
764766
self.result_pb = result_pb
765767
self.order_by = order_by
766768

767769
self.cursor = Cursor(result_pb.cursor)
768770

771+
self._query_options = query_options
772+
769773
def __lt__(self, other):
770774
"""For total ordering."""
771775
return self._compare(other) == -1
@@ -854,7 +858,7 @@ def check_cache(self, context):
854858
will cause `None` to be recorded in the cache.
855859
"""
856860
key = self.key()
857-
if context._use_cache(key):
861+
if context._use_cache(key, self._query_options):
858862
try:
859863
return context.cache.get_and_validate(key)
860864
except KeyError:
@@ -880,7 +884,7 @@ def entity(self):
880884
if entity is _KEY_NOT_IN_CACHE:
881885
# entity not in cache, create one, and then add it to cache
882886
entity = model._entity_from_protobuf(self.result_pb.entity)
883-
if context._use_cache(entity.key):
887+
if context._use_cache(entity.key, self._query_options):
884888
context.cache[entity.key] = entity
885889
return entity
886890

tests/system/test_crud.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,36 @@ class SomeKind(ndb.Model):
601601
assert not cache_value
602602

603603

604+
def test_insert_entity_with_use_global_cache_false(dispose_of, client_context):
605+
class SomeKind(ndb.Model):
606+
foo = ndb.IntegerProperty()
607+
bar = ndb.StringProperty()
608+
609+
global_cache = global_cache_module._InProcessGlobalCache()
610+
with client_context.new(global_cache=global_cache).use() as context:
611+
context.set_global_cache_policy(None) # Use default
612+
613+
entity = SomeKind(foo=42, bar="none")
614+
key = entity.put(use_global_cache=False)
615+
dispose_of(key._key)
616+
cache_key = _cache.global_cache_key(key._key)
617+
cache_value = global_cache.get([cache_key])[0]
618+
assert not cache_value
619+
620+
retrieved = key.get(use_global_cache=False)
621+
assert retrieved.foo == 42
622+
assert retrieved.bar == "none"
623+
624+
cache_value = global_cache.get([cache_key])[0]
625+
assert not cache_value
626+
627+
entity.foo = 43
628+
entity.put(use_global_cache=False)
629+
630+
cache_value = global_cache.get([cache_key])[0]
631+
assert not cache_value
632+
633+
604634
@pytest.mark.skipif(not USE_REDIS_CACHE, reason="Redis is not configured")
605635
def test_insert_entity_with_redis_cache(dispose_of, redis_context):
606636
class SomeKind(ndb.Model):
@@ -1873,3 +1903,42 @@ class SomeKind(ndb.Model):
18731903
dispose_of(key._key)
18741904

18751905
assert key.get().sub_model.data["test"] == 1
1906+
1907+
1908+
def test_put_updates_cache(client_context, dispose_of):
1909+
class SomeKind(ndb.Model):
1910+
foo = ndb.IntegerProperty()
1911+
1912+
client_context.set_cache_policy(None) # Use default
1913+
1914+
entity = SomeKind(foo=42)
1915+
key = entity.put()
1916+
assert len(client_context.cache) == 1
1917+
dispose_of(key._key)
1918+
1919+
1920+
def test_put_with_use_cache_true_updates_cache(client_context, dispose_of):
1921+
class SomeKind(ndb.Model):
1922+
foo = ndb.IntegerProperty()
1923+
1924+
client_context.set_cache_policy(None) # Use default
1925+
1926+
entity = SomeKind(foo=42)
1927+
key = entity.put(use_cache=True)
1928+
assert len(client_context.cache) == 1
1929+
assert client_context.cache[key] is entity
1930+
1931+
dispose_of(key._key)
1932+
1933+
1934+
def test_put_with_use_cache_false_does_not_update_cache(client_context, dispose_of):
1935+
class SomeKind(ndb.Model):
1936+
foo = ndb.IntegerProperty()
1937+
1938+
client_context.set_cache_policy(None) # Use default
1939+
1940+
entity = SomeKind(foo=42)
1941+
key = entity.put(use_cache=False)
1942+
assert len(client_context.cache) == 0
1943+
1944+
dispose_of(key._key)

tests/system/test_query.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2010,3 +2010,29 @@ class SomeKind(ndb.Model):
20102010

20112011
# If there is a cache hit, we'll get back the same object, not just a copy
20122012
assert key.get() is retrieved
2013+
2014+
2015+
def test_query_with_explicit_use_cache_updates_cache(dispose_of, client_context):
2016+
class SomeKind(ndb.Model):
2017+
foo = ndb.IntegerProperty()
2018+
2019+
entity = SomeKind(foo=42)
2020+
key = entity.put(use_cache=False)
2021+
dispose_of(key._key)
2022+
assert len(client_context.cache) == 0
2023+
2024+
eventually(lambda: SomeKind.query().fetch(use_cache=True), length_equals(1))
2025+
assert len(client_context.cache) == 1
2026+
2027+
2028+
def test_query_with_use_cache_false_does_not_update_cache(dispose_of, client_context):
2029+
class SomeKind(ndb.Model):
2030+
foo = ndb.IntegerProperty()
2031+
2032+
entity = SomeKind(foo=42)
2033+
key = entity.put(use_cache=False)
2034+
dispose_of(key._key)
2035+
assert len(client_context.cache) == 0
2036+
2037+
eventually(lambda: SomeKind.query().fetch(use_cache=False), length_equals(1))
2038+
assert len(client_context.cache) == 0

tests/unit/test__datastore_query.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,6 +1500,31 @@ def probably_has_next(self):
15001500

15011501

15021502
class Test_Result:
1503+
@staticmethod
1504+
def test_constructor_defaults():
1505+
result = _datastore_query._Result(
1506+
result_type=None,
1507+
result_pb=query_pb2.EntityResult(),
1508+
)
1509+
assert result.order_by is None
1510+
assert result._query_options is None
1511+
1512+
@staticmethod
1513+
def test_constructor_order_by():
1514+
order = query_module.PropertyOrder("foo")
1515+
result = _datastore_query._Result(
1516+
result_type=None, result_pb=query_pb2.EntityResult(), order_by=[order]
1517+
)
1518+
assert result.order_by == [order]
1519+
1520+
@staticmethod
1521+
def test_constructor_query_options():
1522+
options = query_module.QueryOptions(use_cache=False)
1523+
result = _datastore_query._Result(
1524+
result_type=None, result_pb=query_pb2.EntityResult(), query_options=options
1525+
)
1526+
assert result._query_options == options
1527+
15031528
@staticmethod
15041529
def test_total_ordering():
15051530
def result(foo, bar=0, baz=""):
@@ -1660,9 +1685,15 @@ def test_entity_full_entity(model):
16601685
mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")),
16611686
)
16621687

1688+
context = context_module.get_context()
1689+
1690+
assert len(context.cache) == 0
16631691
assert result.entity() is entity
16641692
model._entity_from_protobuf.assert_called_once_with(entity_pb)
16651693

1694+
# Regression test for #752: ensure cache is updated after querying
1695+
assert len(context.cache) == 1
1696+
16661697
@staticmethod
16671698
@pytest.mark.usefixtures("in_context")
16681699
@mock.patch("google.cloud.ndb._datastore_query.model")
@@ -1703,6 +1734,57 @@ def test_entity_full_entity_no_cache(model):
17031734
)
17041735
assert result.entity() is entity
17051736

1737+
# Regression test for #752: ensure cache does not grow (i.e. use up memory)
1738+
assert len(context.cache) == 0
1739+
1740+
@staticmethod
1741+
@pytest.mark.usefixtures("in_context")
1742+
@mock.patch("google.cloud.ndb._datastore_query.model")
1743+
def test_entity_full_entity_no_cache_via_cache_options(model):
1744+
context = context_module.get_context()
1745+
with context.new().use():
1746+
key_pb = entity_pb2.Key(
1747+
partition_id=entity_pb2.PartitionId(project_id="testing"),
1748+
path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)],
1749+
)
1750+
entity = mock.Mock(key=key_pb)
1751+
model._entity_from_protobuf.return_value = entity
1752+
result = _datastore_query._Result(
1753+
_datastore_query.RESULT_TYPE_FULL,
1754+
mock.Mock(entity=entity, cursor=b"123", spec=("entity", "cursor")),
1755+
query_options=query_module.QueryOptions(use_cache=False),
1756+
)
1757+
assert result.entity() is entity
1758+
1759+
# Regression test for #752: ensure cache does not grow (i.e. use up memory)
1760+
assert len(context.cache) == 0
1761+
1762+
@staticmethod
1763+
@pytest.mark.usefixtures("in_context")
1764+
@mock.patch("google.cloud.ndb._datastore_query.model")
1765+
def test_entity_full_entity_cache_options_true(model):
1766+
key_pb = entity_pb2.Key(
1767+
partition_id=entity_pb2.PartitionId(project_id="testing"),
1768+
path=[entity_pb2.Key.PathElement(kind="ThisKind", id=42)],
1769+
)
1770+
entity_pb = mock.Mock(key=key_pb)
1771+
entity = mock.Mock(key=key_module.Key("ThisKind", 42))
1772+
model._entity_from_protobuf.return_value = entity
1773+
result = _datastore_query._Result(
1774+
_datastore_query.RESULT_TYPE_FULL,
1775+
mock.Mock(entity=entity_pb, cursor=b"123", spec=("entity", "cursor")),
1776+
query_options=query_module.QueryOptions(use_cache=True),
1777+
)
1778+
1779+
context = context_module.get_context()
1780+
1781+
assert len(context.cache) == 0
1782+
assert result.entity() is entity
1783+
model._entity_from_protobuf.assert_called_once_with(entity_pb)
1784+
1785+
# Regression test for #752: ensure cache is updated after querying
1786+
assert len(context.cache) == 1
1787+
17061788
@staticmethod
17071789
@pytest.mark.usefixtures("in_context")
17081790
def test_entity_key_only():

0 commit comments

Comments
 (0)