|
30 | 30 | import copy |
31 | 31 | import hashlib |
32 | 32 | from io import BytesIO |
| 33 | +from io import TextIOWrapper |
33 | 34 | import logging |
34 | 35 | import mimetypes |
35 | 36 | import os |
|
78 | 79 | from google.cloud.storage.retry import DEFAULT_RETRY |
79 | 80 | from google.cloud.storage.retry import DEFAULT_RETRY_IF_ETAG_IN_JSON |
80 | 81 | from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED |
| 82 | +from google.cloud.storage.fileio import BlobReader |
| 83 | +from google.cloud.storage.fileio import BlobWriter |
81 | 84 |
|
82 | 85 |
|
83 | 86 | _API_ACCESS_ENDPOINT = "https://storage.googleapis.com" |
@@ -144,7 +147,9 @@ class Blob(_PropertyMixin): |
144 | 147 | :type chunk_size: int |
145 | 148 | :param chunk_size: |
146 | 149 | (Optional) The size of a chunk of data whenever iterating (in bytes). |
147 | | - This must be a multiple of 256 KB per the API specification. |
| 150 | + This must be a multiple of 256 KB per the API specification. If not |
| 151 | + specified, the chunk_size of the blob itself is used. If that is not |
| 152 | + specified, a default value of 40 MB is used. |
148 | 153 |
|
149 | 154 | :type encryption_key: bytes |
150 | 155 | :param encryption_key: |
@@ -3407,6 +3412,126 @@ def update_storage_class( |
3407 | 3412 | retry=retry, |
3408 | 3413 | ) |
3409 | 3414 |
|
| 3415 | + def open( |
| 3416 | + self, |
| 3417 | + mode="r", |
| 3418 | + chunk_size=None, |
| 3419 | + encoding=None, |
| 3420 | + errors=None, |
| 3421 | + newline=None, |
| 3422 | + **kwargs |
| 3423 | + ): |
| 3424 | + r"""Create a file handler for file-like I/O to or from this blob. |
| 3425 | +
|
| 3426 | + This method can be used as a context manager, just like Python's |
| 3427 | + built-in 'open()' function. |
| 3428 | +
|
| 3429 | + While reading, as with other read methods, if blob.generation is not set |
| 3430 | + the most recent blob generation will be used. Because the file-like IO |
| 3431 | + reader downloads progressively in chunks, this could result in data from |
| 3432 | + multiple versions being mixed together. If this is a concern, use |
| 3433 | + either bucket.get_blob(), or blob.reload(), which will download the |
| 3434 | + latest generation number and set it; or, if the generation is known, set |
| 3435 | + it manually, for instance with bucket.blob(generation=123456). |
| 3436 | +
|
| 3437 | + :type mode: str |
| 3438 | + :param mode: |
| 3439 | + (Optional) A mode string, as per standard Python `open()` semantics.The first |
| 3440 | + character must be 'r', to open the blob for reading, or 'w' to open |
| 3441 | + it for writing. The second character, if present, must be 't' for |
| 3442 | + (unicode) text mode, or 'b' for bytes mode. If the second character |
| 3443 | + is omitted, text mode is the default. |
| 3444 | +
|
| 3445 | + :type chunk_size: long |
| 3446 | + :param chunk_size: |
| 3447 | + (Optional) For reads, the minimum number of bytes to read at a time. |
| 3448 | + If fewer bytes than the chunk_size are requested, the remainder is |
| 3449 | + buffered. For writes, the maximum number of bytes to buffer before |
| 3450 | + sending data to the server, and the size of each request when data |
| 3451 | + is sent. Writes are implemented as a "resumable upload", so |
| 3452 | + chunk_size for writes must be exactly a multiple of 256KiB as with |
| 3453 | + other resumable uploads. The default is 40 MiB. |
| 3454 | +
|
| 3455 | + :type encoding: str |
| 3456 | + :param encoding: |
| 3457 | + (Optional) For text mode only, the name of the encoding that the stream will |
| 3458 | + be decoded or encoded with. If omitted, it defaults to |
| 3459 | + locale.getpreferredencoding(False). |
| 3460 | +
|
| 3461 | + :type errors: str |
| 3462 | + :param errors: |
| 3463 | + (Optional) For text mode only, an optional string that specifies how encoding |
| 3464 | + and decoding errors are to be handled. Pass 'strict' to raise a |
| 3465 | + ValueError exception if there is an encoding error (the default of |
| 3466 | + None has the same effect), or pass 'ignore' to ignore errors. (Note |
| 3467 | + that ignoring encoding errors can lead to data loss.) Other more |
| 3468 | + rarely-used options are also available; see the Python 'io' module |
| 3469 | + documentation for 'io.TextIOWrapper' for a complete list. |
| 3470 | +
|
| 3471 | + :type newline: str |
| 3472 | + :param newline: |
| 3473 | + (Optional) For text mode only, controls how line endings are handled. It can |
| 3474 | + be None, '', '\n', '\r', and '\r\n'. If None, reads use "universal |
| 3475 | + newline mode" and writes use the system default. See the Python |
| 3476 | + 'io' module documentation for 'io.TextIOWrapper' for details. |
| 3477 | +
|
| 3478 | + :param kwargs: Keyword arguments to pass to the underlying API calls. |
| 3479 | + For both uploads and downloads, the following arguments are |
| 3480 | + supported: "if_generation_match", "if_generation_not_match", |
| 3481 | + "if_metageneration_match", "if_metageneration_not_match", "timeout". |
| 3482 | + For uploads only, the following additional arguments are supported: |
| 3483 | + "content_type", "num_retries", "predefined_acl", "checksum". |
| 3484 | +
|
| 3485 | + :returns: A 'BlobReader' or 'BlobWriter' from |
| 3486 | + 'google.cloud.storage.fileio', or an 'io.TextIOWrapper' around one |
| 3487 | + of those classes, depending on the 'mode' argument. |
| 3488 | +
|
| 3489 | + Example: |
| 3490 | + Read from a text blob by using open() as context manager. |
| 3491 | +
|
| 3492 | + Using bucket.get_blob() fetches metadata such as the generation, |
| 3493 | + which prevents race conditions in case the blob is modified. |
| 3494 | +
|
| 3495 | + >>> from google.cloud import storage |
| 3496 | + >>> client = storage.Client() |
| 3497 | + >>> bucket = client.bucket("bucket-name") |
| 3498 | +
|
| 3499 | + >>> blob = bucket.get_blob("blob-name.txt") |
| 3500 | + >>> with blob.open("rt") as f: |
| 3501 | + >>> print(f.read()) |
| 3502 | +
|
| 3503 | + """ |
| 3504 | + if mode == "rb": |
| 3505 | + if encoding or errors or newline: |
| 3506 | + raise ValueError( |
| 3507 | + "encoding, errors and newline arguments are for text mode only" |
| 3508 | + ) |
| 3509 | + return BlobReader(self, chunk_size=chunk_size, **kwargs) |
| 3510 | + elif mode == "wb": |
| 3511 | + if encoding or errors or newline: |
| 3512 | + raise ValueError( |
| 3513 | + "encoding, errors and newline arguments are for text mode only" |
| 3514 | + ) |
| 3515 | + return BlobWriter(self, chunk_size=chunk_size, **kwargs) |
| 3516 | + elif mode in ("r", "rt"): |
| 3517 | + return TextIOWrapper( |
| 3518 | + BlobReader(self, chunk_size=chunk_size, **kwargs), |
| 3519 | + encoding=encoding, |
| 3520 | + errors=errors, |
| 3521 | + newline=newline, |
| 3522 | + ) |
| 3523 | + elif mode in ("w", "wt"): |
| 3524 | + return TextIOWrapper( |
| 3525 | + BlobWriter(self, chunk_size=chunk_size, text_mode=True, **kwargs), |
| 3526 | + encoding=encoding, |
| 3527 | + errors=errors, |
| 3528 | + newline=newline, |
| 3529 | + ) |
| 3530 | + else: |
| 3531 | + raise NotImplementedError( |
| 3532 | + "Supported modes strings are 'r', 'rb', 'rt', 'w', 'wb', and 'wt' only." |
| 3533 | + ) |
| 3534 | + |
3410 | 3535 | cache_control = _scalar_property("cacheControl") |
3411 | 3536 | """HTTP 'Cache-Control' header for this object. |
3412 | 3537 |
|
|
0 commit comments