Skip to content

Commit 440a0a4

Browse files
authored
feat: add blob.open() for file-like I/O (#385)
Fixes #29
1 parent 1a2734b commit 440a0a4

File tree

5 files changed

+1228
-1
lines changed

5 files changed

+1228
-1
lines changed

google/cloud/storage/blob.py

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import copy
3131
import hashlib
3232
from io import BytesIO
33+
from io import TextIOWrapper
3334
import logging
3435
import mimetypes
3536
import os
@@ -78,6 +79,8 @@
7879
from google.cloud.storage.retry import DEFAULT_RETRY
7980
from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON
8081
from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
82+
from google.cloud.storage.fileio import BlobReader
83+
from google.cloud.storage.fileio import BlobWriter
8184

8285

8386
_API_ACCESS_ENDPOINT = "https://storage.googleapis.com"
@@ -144,7 +147,9 @@ class Blob(_PropertyMixin):
144147
:type chunk_size: int
145148
:param chunk_size:
146149
(Optional) The size of a chunk of data whenever iterating (in bytes).
147-
This must be a multiple of 256 KB per the API specification.
150+
This must be a multiple of 256 KB per the API specification. If not
151+
specified, the chunk_size of the blob itself is used. If that is not
152+
specified, a default value of 40 MB is used.
148153
149154
:type encryption_key: bytes
150155
:param encryption_key:
@@ -3407,6 +3412,126 @@ def update_storage_class(
34073412
retry=retry,
34083413
)
34093414

3415+
def open(
3416+
self,
3417+
mode="r",
3418+
chunk_size=None,
3419+
encoding=None,
3420+
errors=None,
3421+
newline=None,
3422+
**kwargs
3423+
):
3424+
r"""Create a file handler for file-like I/O to or from this blob.
3425+
3426+
This method can be used as a context manager, just like Python's
3427+
built-in 'open()' function.
3428+
3429+
While reading, as with other read methods, if blob.generation is not set
3430+
the most recent blob generation will be used. Because the file-like IO
3431+
reader downloads progressively in chunks, this could result in data from
3432+
multiple versions being mixed together. If this is a concern, use
3433+
either bucket.get_blob(), or blob.reload(), which will download the
3434+
latest generation number and set it; or, if the generation is known, set
3435+
it manually, for instance with bucket.blob(generation=123456).
3436+
3437+
:type mode: str
3438+
:param mode:
3439+
(Optional) A mode string, as per standard Python `open()` semantics.The first
3440+
character must be 'r', to open the blob for reading, or 'w' to open
3441+
it for writing. The second character, if present, must be 't' for
3442+
(unicode) text mode, or 'b' for bytes mode. If the second character
3443+
is omitted, text mode is the default.
3444+
3445+
:type chunk_size: long
3446+
:param chunk_size:
3447+
(Optional) For reads, the minimum number of bytes to read at a time.
3448+
If fewer bytes than the chunk_size are requested, the remainder is
3449+
buffered. For writes, the maximum number of bytes to buffer before
3450+
sending data to the server, and the size of each request when data
3451+
is sent. Writes are implemented as a "resumable upload", so
3452+
chunk_size for writes must be exactly a multiple of 256KiB as with
3453+
other resumable uploads. The default is 40 MiB.
3454+
3455+
:type encoding: str
3456+
:param encoding:
3457+
(Optional) For text mode only, the name of the encoding that the stream will
3458+
be decoded or encoded with. If omitted, it defaults to
3459+
locale.getpreferredencoding(False).
3460+
3461+
:type errors: str
3462+
:param errors:
3463+
(Optional) For text mode only, an optional string that specifies how encoding
3464+
and decoding errors are to be handled. Pass 'strict' to raise a
3465+
ValueError exception if there is an encoding error (the default of
3466+
None has the same effect), or pass 'ignore' to ignore errors. (Note
3467+
that ignoring encoding errors can lead to data loss.) Other more
3468+
rarely-used options are also available; see the Python 'io' module
3469+
documentation for 'io.TextIOWrapper' for a complete list.
3470+
3471+
:type newline: str
3472+
:param newline:
3473+
(Optional) For text mode only, controls how line endings are handled. It can
3474+
be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal
3475+
newline mode" and writes use the system default. See the Python
3476+
'io' module documentation for 'io.TextIOWrapper' for details.
3477+
3478+
:param kwargs: Keyword arguments to pass to the underlying API calls.
3479+
For both uploads and downloads, the following arguments are
3480+
supported: "if_generation_match", "if_generation_not_match",
3481+
"if_metageneration_match", "if_metageneration_not_match", "timeout".
3482+
For uploads only, the following additional arguments are supported:
3483+
"content_type", "num_retries", "predefined_acl", "checksum".
3484+
3485+
:returns: A 'BlobReader' or 'BlobWriter' from
3486+
'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one
3487+
of those classes, depending on the 'mode' argument.
3488+
3489+
Example:
3490+
Read from a text blob by using open() as context manager.
3491+
3492+
Using bucket.get_blob() fetches metadata such as the generation,
3493+
which prevents race conditions in case the blob is modified.
3494+
3495+
>>> from google.cloud import storage
3496+
>>> client = storage.Client()
3497+
>>> bucket = client.bucket("bucket-name")
3498+
3499+
>>> blob = bucket.get_blob("blob-name.txt")
3500+
>>> with blob.open("rt") as f:
3501+
>>> print(f.read())
3502+
3503+
"""
3504+
if mode == "rb":
3505+
if encoding or errors or newline:
3506+
raise ValueError(
3507+
"encoding, errors and newline arguments are for text mode only"
3508+
)
3509+
return BlobReader(self, chunk_size=chunk_size, **kwargs)
3510+
elif mode == "wb":
3511+
if encoding or errors or newline:
3512+
raise ValueError(
3513+
"encoding, errors and newline arguments are for text mode only"
3514+
)
3515+
return BlobWriter(self, chunk_size=chunk_size, **kwargs)
3516+
elif mode in ("r", "rt"):
3517+
return TextIOWrapper(
3518+
BlobReader(self, chunk_size=chunk_size, **kwargs),
3519+
encoding=encoding,
3520+
errors=errors,
3521+
newline=newline,
3522+
)
3523+
elif mode in ("w", "wt"):
3524+
return TextIOWrapper(
3525+
BlobWriter(self, chunk_size=chunk_size, text_mode=True, **kwargs),
3526+
encoding=encoding,
3527+
errors=errors,
3528+
newline=newline,
3529+
)
3530+
else:
3531+
raise NotImplementedError(
3532+
"Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only."
3533+
)
3534+
34103535
cache_control = _scalar_property("cacheControl")
34113536
"""HTTP 'Cache-Control' header for this object.
34123537

0 commit comments

Comments
 (0)