Skip to content

Commit 0a243fa

Browse files
authored
fix: Blob.from_string parse storage uri with regex (#1170)
1 parent d38adb6 commit 0a243fa

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

google/cloud/storage/blob.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,9 @@
134134
"Blob.download_as_string() is deprecated and will be removed in future. "
135135
"Use Blob.download_as_bytes() instead."
136136
)
137-
137+
_GS_URL_REGEX_PATTERN = re.compile(
138+
r"(?P<scheme>gs)://(?P<bucket_name>[a-z0-9_.-]+)/(?P<object_name>.+)"
139+
)
138140

139141
_DEFAULT_CHUNKSIZE = 104857600 # 1024 * 1024 B * 100 = 100 MB
140142
_MAX_MULTIPART_SIZE = 8388608 # 8 MB
@@ -403,12 +405,11 @@ def from_string(cls, uri, client=None):
403405
"""
404406
from google.cloud.storage.bucket import Bucket
405407

406-
scheme, netloc, path, query, frag = urlsplit(uri)
407-
if scheme != "gs":
408+
match = _GS_URL_REGEX_PATTERN.match(uri)
409+
if not match:
408410
raise ValueError("URI scheme must be gs")
409-
410-
bucket = Bucket(client, name=netloc)
411-
return cls(path[1:], bucket)
411+
bucket = Bucket(client, name=match.group("bucket_name"))
412+
return cls(match.group("object_name"), bucket)
412413

413414
def generate_signed_url(
414415
self,

tests/unit/test_blob.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5819,13 +5819,21 @@ def test_from_string_w_valid_uri(self):
58195819
from google.cloud.storage.blob import Blob
58205820

58215821
client = self._make_client()
5822-
uri = "gs://BUCKET_NAME/b"
5823-
blob = Blob.from_string(uri, client)
5822+
basic_uri = "gs://bucket_name/b"
5823+
blob = Blob.from_string(basic_uri, client)
58245824

58255825
self.assertIsInstance(blob, Blob)
58265826
self.assertIs(blob.client, client)
58275827
self.assertEqual(blob.name, "b")
5828-
self.assertEqual(blob.bucket.name, "BUCKET_NAME")
5828+
self.assertEqual(blob.bucket.name, "bucket_name")
5829+
5830+
nested_uri = "gs://bucket_name/path1/path2/b#name"
5831+
blob = Blob.from_string(nested_uri, client)
5832+
5833+
self.assertIsInstance(blob, Blob)
5834+
self.assertIs(blob.client, client)
5835+
self.assertEqual(blob.name, "path1/path2/b#name")
5836+
self.assertEqual(blob.bucket.name, "bucket_name")
58295837

58305838
def test_from_string_w_invalid_uri(self):
58315839
from google.cloud.storage.blob import Blob

0 commit comments

Comments
 (0)