Skip to content

Commit 053d08e

Browse files
authored
✨ Source S3: Add handling NoSuchBucket error (#31383)
1 parent 2014cd8 commit 053d08e

File tree

4 files changed

+26
-18
lines changed

4 files changed

+26
-18
lines changed

airbyte-integrations/connectors/source-s3/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ data:
1010
connectorSubtype: file
1111
connectorType: source
1212
definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2
13-
dockerImageTag: 4.1.1
13+
dockerImageTag: 4.1.2
1414
dockerRepository: airbyte/source-s3
1515
documentationUrl: https://docs.airbyte.com/integrations/sources/s3
1616
githubIssueLabel: source-s3

airbyte-integrations/connectors/source-s3/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from setuptools import find_packages, setup
77

88
MAIN_REQUIREMENTS = [
9-
"airbyte-cdk>=0.51.35",
9+
"airbyte-cdk>=0.52.0",
1010
"pyarrow==12.0.1",
1111
"smart-open[s3]==5.1.0",
1212
"wcmatch==8.4",

airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@
1010
import boto3.session
1111
import pytz
1212
import smart_open
13-
from airbyte_cdk.sources.file_based.exceptions import ErrorListingFiles, FileBasedSourceError
13+
from airbyte_cdk.models import FailureType
14+
from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError
1415
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
1516
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
1617
from botocore.client import BaseClient
1718
from botocore.client import Config as ClientConfig
19+
from botocore.exceptions import ClientError
1820
from source_s3.v4.config import Config
1921
from source_s3.v4.zip_reader import DecompressedStream, RemoteFileInsideArchive, ZipContentReader, ZipFileHandler
2022

@@ -68,25 +70,30 @@ def get_matching_files(self, globs: List[str], prefix: Optional[str], logger: lo
6870
total_n_keys = 0
6971

7072
try:
71-
if prefixes:
72-
for prefix in prefixes:
73-
for remote_file in self._page(s3, globs, self.config.bucket, prefix, seen, logger):
74-
total_n_keys += 1
75-
yield remote_file
76-
else:
77-
for remote_file in self._page(s3, globs, self.config.bucket, None, seen, logger):
73+
for current_prefix in prefixes if prefixes else [None]:
74+
for remote_file in self._page(s3, globs, self.config.bucket, current_prefix, seen, logger):
7875
total_n_keys += 1
7976
yield remote_file
8077

8178
logger.info(f"Finished listing objects from S3. Found {total_n_keys} objects total ({len(seen)} unique objects).")
79+
except ClientError as exc:
80+
if exc.response["Error"]["Code"] == "NoSuchBucket":
81+
raise CustomFileBasedException(
82+
f"The bucket {self.config.bucket} does not exist.", failure_type=FailureType.config_error, exception=exc
83+
)
84+
self._raise_error_listing_files(globs, exc)
8285
except Exception as exc:
83-
raise ErrorListingFiles(
84-
FileBasedSourceError.ERROR_LISTING_FILES,
85-
source="s3",
86-
bucket=self.config.bucket,
87-
globs=globs,
88-
endpoint=self.config.endpoint,
89-
) from exc
86+
self._raise_error_listing_files(globs, exc)
87+
88+
def _raise_error_listing_files(self, globs: List[str], exc: Optional[Exception] = None):
89+
"""Helper method to raise the ErrorListingFiles exception."""
90+
raise ErrorListingFiles(
91+
FileBasedSourceError.ERROR_LISTING_FILES,
92+
source="s3",
93+
bucket=self.config.bucket,
94+
globs=globs,
95+
endpoint=self.config.endpoint,
96+
) from exc
9097

9198
def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
9299
try:

docs/integrations/sources/s3.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,8 @@ One record will be emitted for each document. Keep in mind that large files can
250250
## Changelog
251251

252252
| Version | Date | Pull Request | Subject |
253-
| :------ | :--------- | :-------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- |
253+
|:--------|:-----------| :-------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------- |
254+
| 4.1.2 | 2023-10-23 | [31383](https://github.com/airbytehq/airbyte/pull/31383) | Add handling NoSuchBucket error |
254255
| 4.1.1 | 2023-10-19 | [31601](https://github.com/airbytehq/airbyte/pull/31601) | Base image migration: remove Dockerfile and use the python-connector-base image |
255256
| 4.1.0 | 2023-10-17 | [31340](https://github.com/airbytehq/airbyte/pull/31340) | Add reading files inside zip archive |
256257
| 4.0.5 | 2023-10-16 | [31209](https://github.com/airbytehq/airbyte/pull/31209) | Add experimental Markdown/PDF/Docx file format |

0 commit comments

Comments
 (0)