Skip to content

Commit 2f8716b

Browse files
authored
Chore: refactor client.download_blob_to_file (googleapis#1052)
* Refactor client.download_blob_to_file * Chore: clean up code * refactor blob and client unit tests * lint reformat * Rename _prep_and_do_download
1 parent 11f6024 commit 2f8716b

File tree

4 files changed

+462
-319
lines changed

4 files changed

+462
-319
lines changed

google/cloud/storage/blob.py

Lines changed: 165 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -904,7 +904,7 @@ def _do_download(
904904
):
905905
"""Perform a download without any error handling.
906906
907-
This is intended to be called by :meth:`download_to_file` so it can
907+
This is intended to be called by :meth:`_prep_and_do_download` so it can
908908
be wrapped with error handling / remapping.
909909
910910
:type transport:
@@ -957,7 +957,7 @@ def _do_download(
957957
958958
This private method does not accept ConditionalRetryPolicy values
959959
because the information necessary to evaluate the policy is instead
960-
evaluated in client.download_blob_to_file().
960+
evaluated in blob._prep_and_do_download().
961961
962962
See the retry.py source code and docstrings in this package
963963
(google.cloud.storage.retry) for information on retry types and how
@@ -1124,11 +1124,10 @@ def download_to_file(
11241124
11251125
:raises: :class:`google.cloud.exceptions.NotFound`
11261126
"""
1127-
client = self._require_client(client)
11281127

1129-
client.download_blob_to_file(
1130-
self,
1131-
file_obj=file_obj,
1128+
self._prep_and_do_download(
1129+
file_obj,
1130+
client=client,
11321131
start=start,
11331132
end=end,
11341133
raw_download=raw_download,
@@ -1250,12 +1249,12 @@ def download_to_filename(
12501249
12511250
:raises: :class:`google.cloud.exceptions.NotFound`
12521251
"""
1253-
client = self._require_client(client)
1252+
12541253
try:
12551254
with open(filename, "wb") as file_obj:
1256-
client.download_blob_to_file(
1257-
self,
1255+
self._prep_and_do_download(
12581256
file_obj,
1257+
client=client,
12591258
start=start,
12601259
end=end,
12611260
raw_download=raw_download,
@@ -1382,11 +1381,12 @@ def download_as_bytes(
13821381
13831382
:raises: :class:`google.cloud.exceptions.NotFound`
13841383
"""
1385-
client = self._require_client(client)
1384+
13861385
string_buffer = BytesIO()
1387-
client.download_blob_to_file(
1388-
self,
1386+
1387+
self._prep_and_do_download(
13891388
string_buffer,
1389+
client=client,
13901390
start=start,
13911391
end=end,
13921392
raw_download=raw_download,
@@ -3936,6 +3936,159 @@ def open(
39363936
:rtype: str or ``NoneType``
39373937
"""
39383938

3939+
def _prep_and_do_download(
3940+
self,
3941+
file_obj,
3942+
client=None,
3943+
start=None,
3944+
end=None,
3945+
raw_download=False,
3946+
if_etag_match=None,
3947+
if_etag_not_match=None,
3948+
if_generation_match=None,
3949+
if_generation_not_match=None,
3950+
if_metageneration_match=None,
3951+
if_metageneration_not_match=None,
3952+
timeout=_DEFAULT_TIMEOUT,
3953+
checksum="md5",
3954+
retry=DEFAULT_RETRY,
3955+
):
3956+
3957+
"""Download the contents of a blob object into a file-like object.
3958+
3959+
See https://cloud.google.com/storage/docs/downloading-objects
3960+
3961+
If :attr:`user_project` is set on the bucket, bills the API request
3962+
to that project.
3963+
3964+
:type file_obj: file
3965+
:param file_obj: A file handle to which to write the blob's data.
3966+
3967+
:type client: :class:`~google.cloud.storage.client.Client`
3968+
:param client:
3969+
(Optional) The client to use. If not passed, falls back to the
3970+
``client`` stored on the blob's bucket.
3971+
3972+
:type start: int
3973+
:param start: (Optional) The first byte in a range to be downloaded.
3974+
3975+
:type end: int
3976+
:param end: (Optional) The last byte in a range to be downloaded.
3977+
3978+
:type raw_download: bool
3979+
:param raw_download:
3980+
(Optional) If true, download the object without any expansion.
3981+
3982+
:type if_etag_match: Union[str, Set[str]]
3983+
:param if_etag_match:
3984+
(Optional) See :ref:`using-if-etag-match`
3985+
3986+
:type if_etag_not_match: Union[str, Set[str]]
3987+
:param if_etag_not_match:
3988+
(Optional) See :ref:`using-if-etag-not-match`
3989+
3990+
:type if_generation_match: long
3991+
:param if_generation_match:
3992+
(Optional) See :ref:`using-if-generation-match`
3993+
3994+
:type if_generation_not_match: long
3995+
:param if_generation_not_match:
3996+
(Optional) See :ref:`using-if-generation-not-match`
3997+
3998+
:type if_metageneration_match: long
3999+
:param if_metageneration_match:
4000+
(Optional) See :ref:`using-if-metageneration-match`
4001+
4002+
:type if_metageneration_not_match: long
4003+
:param if_metageneration_not_match:
4004+
(Optional) See :ref:`using-if-metageneration-not-match`
4005+
4006+
:type timeout: float or tuple
4007+
:param timeout:
4008+
(Optional) The amount of time, in seconds, to wait
4009+
for the server response. See: :ref:`configuring_timeouts`
4010+
4011+
:type checksum: str
4012+
:param checksum:
4013+
(Optional) The type of checksum to compute to verify the integrity
4014+
of the object. The response headers must contain a checksum of the
4015+
requested type. If the headers lack an appropriate checksum (for
4016+
instance in the case of transcoded or ranged downloads where the
4017+
remote service does not know the correct checksum, including
4018+
downloads where chunk_size is set) an INFO-level log will be
4019+
emitted. Supported values are "md5", "crc32c" and None. The default
4020+
is "md5".
4021+
4022+
:type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
4023+
:param retry: (Optional) How to retry the RPC. A None value will disable
4024+
retries. A google.api_core.retry.Retry value will enable retries,
4025+
and the object will define retriable response codes and errors and
4026+
configure backoff and timeout options.
4027+
4028+
A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
4029+
Retry object and activates it only if certain conditions are met.
4030+
This class exists to provide safe defaults for RPC calls that are
4031+
not technically safe to retry normally (due to potential data
4032+
duplication or other side-effects) but become safe to retry if a
4033+
condition such as if_metageneration_match is set.
4034+
4035+
See the retry.py source code and docstrings in this package
4036+
(google.cloud.storage.retry) for information on retry types and how
4037+
to configure them.
4038+
4039+
Media operations (downloads and uploads) do not support non-default
4040+
predicates in a Retry object. The default will always be used. Other
4041+
configuration changes for Retry objects such as delays and deadlines
4042+
are respected.
4043+
"""
4044+
# Handle ConditionalRetryPolicy.
4045+
if isinstance(retry, ConditionalRetryPolicy):
4046+
# Conditional retries are designed for non-media calls, which change
4047+
# arguments into query_params dictionaries. Media operations work
4048+
# differently, so here we make a "fake" query_params to feed to the
4049+
# ConditionalRetryPolicy.
4050+
query_params = {
4051+
"ifGenerationMatch": if_generation_match,
4052+
"ifMetagenerationMatch": if_metageneration_match,
4053+
}
4054+
retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
4055+
4056+
client = self._require_client(client)
4057+
4058+
download_url = self._get_download_url(
4059+
client,
4060+
if_generation_match=if_generation_match,
4061+
if_generation_not_match=if_generation_not_match,
4062+
if_metageneration_match=if_metageneration_match,
4063+
if_metageneration_not_match=if_metageneration_not_match,
4064+
)
4065+
headers = _get_encryption_headers(self._encryption_key)
4066+
headers["accept-encoding"] = "gzip"
4067+
_add_etag_match_headers(
4068+
headers,
4069+
if_etag_match=if_etag_match,
4070+
if_etag_not_match=if_etag_not_match,
4071+
)
4072+
headers = {**_get_default_headers(client._connection.user_agent), **headers}
4073+
4074+
transport = client._http
4075+
4076+
try:
4077+
self._do_download(
4078+
transport,
4079+
file_obj,
4080+
download_url,
4081+
headers,
4082+
start,
4083+
end,
4084+
raw_download,
4085+
timeout=timeout,
4086+
checksum=checksum,
4087+
retry=retry,
4088+
)
4089+
except resumable_media.InvalidResponse as exc:
4090+
_raise_from_invalid_response(exc)
4091+
39394092
@property
39404093
def component_count(self):
39414094
"""Number of underlying components that make up this object.

google/cloud/storage/client.py

Lines changed: 15 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -25,18 +25,16 @@
2525

2626
from google.auth.credentials import AnonymousCredentials
2727

28-
from google import resumable_media
29-
3028
from google.api_core import page_iterator
3129
from google.cloud._helpers import _LocalStack, _NOW
3230
from google.cloud.client import ClientWithProject
3331
from google.cloud.exceptions import NotFound
34-
from google.cloud.storage._helpers import _get_default_headers
32+
3533
from google.cloud.storage._helpers import _get_environ_project
3634
from google.cloud.storage._helpers import _get_storage_host
3735
from google.cloud.storage._helpers import _DEFAULT_STORAGE_HOST
3836
from google.cloud.storage._helpers import _bucket_bound_hostname_url
39-
from google.cloud.storage._helpers import _add_etag_match_headers
37+
4038
from google.cloud.storage._http import Connection
4139
from google.cloud.storage._signing import (
4240
get_expiration_seconds_v4,
@@ -46,17 +44,12 @@
4644
)
4745
from google.cloud.storage.batch import Batch
4846
from google.cloud.storage.bucket import Bucket, _item_to_blob, _blobs_page_start
49-
from google.cloud.storage.blob import (
50-
Blob,
51-
_get_encryption_headers,
52-
_raise_from_invalid_response,
53-
)
47+
from google.cloud.storage.blob import Blob
5448
from google.cloud.storage.hmac_key import HMACKeyMetadata
5549
from google.cloud.storage.acl import BucketACL
5650
from google.cloud.storage.acl import DefaultObjectACL
5751
from google.cloud.storage.constants import _DEFAULT_TIMEOUT
5852
from google.cloud.storage.retry import DEFAULT_RETRY
59-
from google.cloud.storage.retry import ConditionalRetryPolicy
6053

6154

6255
_marker = object()
@@ -1064,52 +1057,25 @@ def download_blob_to_file(
10641057
are respected.
10651058
"""
10661059

1067-
# Handle ConditionalRetryPolicy.
1068-
if isinstance(retry, ConditionalRetryPolicy):
1069-
# Conditional retries are designed for non-media calls, which change
1070-
# arguments into query_params dictionaries. Media operations work
1071-
# differently, so here we make a "fake" query_params to feed to the
1072-
# ConditionalRetryPolicy.
1073-
query_params = {
1074-
"ifGenerationMatch": if_generation_match,
1075-
"ifMetagenerationMatch": if_metageneration_match,
1076-
}
1077-
retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
1078-
10791060
if not isinstance(blob_or_uri, Blob):
10801061
blob_or_uri = Blob.from_string(blob_or_uri)
1081-
download_url = blob_or_uri._get_download_url(
1082-
self,
1062+
1063+
blob_or_uri._prep_and_do_download(
1064+
file_obj,
1065+
client=self,
1066+
start=start,
1067+
end=end,
1068+
raw_download=raw_download,
1069+
if_etag_match=if_etag_match,
1070+
if_etag_not_match=if_etag_not_match,
10831071
if_generation_match=if_generation_match,
10841072
if_generation_not_match=if_generation_not_match,
10851073
if_metageneration_match=if_metageneration_match,
10861074
if_metageneration_not_match=if_metageneration_not_match,
1075+
timeout=timeout,
1076+
checksum=checksum,
1077+
retry=retry,
10871078
)
1088-
headers = _get_encryption_headers(blob_or_uri._encryption_key)
1089-
headers["accept-encoding"] = "gzip"
1090-
_add_etag_match_headers(
1091-
headers,
1092-
if_etag_match=if_etag_match,
1093-
if_etag_not_match=if_etag_not_match,
1094-
)
1095-
headers = {**_get_default_headers(self._connection.user_agent), **headers}
1096-
1097-
transport = self._http
1098-
try:
1099-
blob_or_uri._do_download(
1100-
transport,
1101-
file_obj,
1102-
download_url,
1103-
headers,
1104-
start,
1105-
end,
1106-
raw_download,
1107-
timeout=timeout,
1108-
checksum=checksum,
1109-
retry=retry,
1110-
)
1111-
except resumable_media.InvalidResponse as exc:
1112-
_raise_from_invalid_response(exc)
11131079

11141080
def list_blobs(
11151081
self,

0 commit comments

Comments
 (0)