Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
30188ec
source-sftp-bulk: initial changes to be compliant with new protocol
aldogonzalez8 Apr 8, 2025
e972c14
source-sftp-bulk: update release information
aldogonzalez8 Apr 8, 2025
496be27
source-sftp-bulk: ruff format
aldogonzalez8 Apr 8, 2025
28ba5c1
sftp-bulk-transfer: update updated at type
aldogonzalez8 Apr 9, 2025
a65902e
sftp-bulk-transfer: ruff format
aldogonzalez8 Apr 9, 2025
9108c4f
source-sftp-bulk: fix unit tests
aldogonzalez8 Apr 9, 2025
06ed538
source-sftp-bulk: ruff format
aldogonzalez8 Apr 9, 2025
eeb3b69
source-sftp-bulk: add source uri
aldogonzalez8 Apr 16, 2025
cecc472
source-sftp-bulk: ruff format
aldogonzalez8 Apr 16, 2025
03e128d
source-sftp-bulk: fix test
aldogonzalez8 Apr 16, 2025
ee3f4d1
source-sftp-bulk: poetry lock
aldogonzalez8 Apr 16, 2025
cc3f100
file-api: bump cdk to latest pre-release
aldogonzalez8 Apr 19, 2025
8668b55
source-sftp-bulk: poetry lock
aldogonzalez8 Apr 19, 2025
8fabc94
source-sftp-bulk: poetry lock
aldogonzalez8 Apr 22, 2025
55ce914
source-sftp-bulk: merge from master
aldogonzalez8 Apr 22, 2025
ca8f321
source-sftp-bulk: poetry lock
aldogonzalez8 Apr 22, 2025
338d89d
source-google-drive: bump latest cdk and fix file name field
aldogonzalez8 Apr 23, 2025
60c104c
source-sftpb: fix unit test after file name field change
aldogonzalez8 Apr 23, 2025
e09138e
Merge branch 'master' into aldogonzalez8/source-sftp-bulk/adjust-file…
aldogonzalez8 Apr 24, 2025
9a1e398
sftpb: update release information
aldogonzalez8 Apr 25, 2025
3469e92
sftpb: update release information
aldogonzalez8 Apr 25, 2025
b213fa0
Merge branch 'master' into aldogonzalez8/source-sftp-bulk/adjust-file…
aldogonzalez8 Apr 30, 2025
713d334
correct the destination version
aldogonzalez8 Apr 30, 2025
90ca737
update release information
aldogonzalez8 May 5, 2025
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from airbyte_cdk import AirbyteTracedException, ConfiguredAirbyteCatalog
from airbyte_cdk.models import (
AirbyteRecordMessageFileReference,
FailureType,
Status,
)
Expand Down Expand Up @@ -101,27 +102,37 @@ def test_get_files_empty_files(configured_catalog: ConfiguredAirbyteCatalog, con


@pytest.mark.slow
@pytest.mark.limit_memory("10 MB")
@pytest.mark.limit_memory("11 MB")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we changed the size here?

Copy link
Contributor Author

@aldogonzalez8 aldogonzalez8 Apr 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After play for a few minutes with this locally, I also got a consistent message (not increasing) in the master branch as in the dev branch:
Test was limited to 10.0MiB but allocated 10.6MiB

So, it's more a matter of local memory resources than an issue with the changes. Then, it's safe to increase this to avoid annoying the local developer.

def test_get_file_csv_file_transfer(configured_catalog: ConfiguredAirbyteCatalog, config_fixture_use_file_transfer: Mapping[str, Any]):
source = SourceSFTPBulk(catalog=configured_catalog, config=config_fixture_use_file_transfer, state=None)
output = read(source=source, config=config_fixture_use_file_transfer, catalog=configured_catalog)
expected_file_data = {

file_folder = "files/file_transfer"
file_name = "file_transfer_1.csv"
source_file_relative_path = f"{file_folder}/{file_name}"
expected_file_data = AirbyteRecordMessageFileReference(
file_size_bytes=46_754_266,
source_file_relative_path=source_file_relative_path,
staging_file_url=f"/tmp/airbyte-file-transfer/{source_file_relative_path}",
)
expected_record_data = {
"bytes": 46_754_266,
"file_relative_path": "files/file_transfer/file_transfer_1.csv",
"file_url": "/tmp/airbyte-file-transfer/files/file_transfer/file_transfer_1.csv",
"modified": ANY,
"source_file_url": "/files/file_transfer/file_transfer_1.csv",
"file_name": file_name,
"folder": f"/{file_folder}",
"source_uri": f"sftp://{config_fixture_use_file_transfer['username']}@{config_fixture_use_file_transfer['host']}:{config_fixture_use_file_transfer['port']}/{source_file_relative_path}",
"updated_at": ANY,
}
assert len(output.records) == 1
assert list(map(lambda record: record.record.file, output.records)) == [expected_file_data]
assert list(map(lambda record: record.record.file_reference, output.records)) == [expected_file_data]
assert list(map(lambda record: record.record.data, output.records)) == [expected_record_data]

# Additional assertion to check if the file exists at the file_url path
file_path = expected_file_data["file_url"]
file_path = expected_file_data.staging_file_url
assert os.path.exists(file_path), f"File not found at path: {file_path}"


@pytest.mark.slow
@pytest.mark.limit_memory("10 MB")
@pytest.mark.limit_memory("11 MB")
def test_get_all_file_csv_file_transfer(
configured_catalog: ConfiguredAirbyteCatalog, config_fixture_use_all_files_transfer: Mapping[str, Any]
):
Expand All @@ -132,8 +143,8 @@ def test_get_all_file_csv_file_transfer(
source = SourceSFTPBulk(catalog=configured_catalog, config=config_fixture_use_all_files_transfer, state=None)
output = read(source=source, config=config_fixture_use_all_files_transfer, catalog=configured_catalog)
assert len(output.records) == 5
total_bytes = sum(list(map(lambda record: record.record.file["bytes"], output.records)))
files_paths = list(map(lambda record: record.record.file["file_url"], output.records))
total_bytes = sum(list(map(lambda record: record.record.file_reference.file_size_bytes, output.records)))
files_paths = list(map(lambda record: record.record.file_reference.staging_file_url, output.records))
for file_path in files_paths:
assert os.path.exists(file_path), f"File not found at path: {file_path}"
assert total_bytes == 233_771_330
Expand All @@ -150,8 +161,8 @@ def test_default_mirroring_paths_works_for_not_present_config_file_transfer(
source = SourceSFTPBulk(catalog=configured_catalog, config=config_fixture_not_duplicates, state=None)
output = read(source=source, config=config_fixture_not_duplicates, catalog=configured_catalog)
assert len(output.records) == expected_uniqueness_count
files_paths = set(map(lambda record: record.record.file["file_url"], output.records))
files_relative_paths = set(map(lambda record: record.record.file["file_relative_path"], output.records))
files_paths = set(map(lambda record: record.record.file_reference.staging_file_url, output.records))
files_relative_paths = set(map(lambda record: record.record.file_reference.source_file_relative_path, output.records))
assert len(files_relative_paths) == expected_uniqueness_count
assert len(files_paths) == expected_uniqueness_count
for file_path, files_relative_path in zip(files_paths, files_relative_paths):
Expand All @@ -170,8 +181,8 @@ def test_not_mirroring_paths_not_duplicates_file_transfer(
source = SourceSFTPBulk(catalog=configured_catalog, config=config_fixture_not_mirroring_paths_not_duplicates, state=None)
output = read(source=source, config=config_fixture_not_mirroring_paths_not_duplicates, catalog=configured_catalog)
assert len(output.records) == expected_uniqueness_count
files_paths = set(map(lambda record: record.record.file["file_url"], output.records))
files_relative_paths = set(map(lambda record: record.record.file["file_relative_path"], output.records))
files_paths = set(map(lambda record: record.record.file_reference.staging_file_url, output.records))
files_relative_paths = set(map(lambda record: record.record.file_reference.source_file_relative_path, output.records))
assert len(files_relative_paths) == expected_uniqueness_count
assert len(files_paths) == expected_uniqueness_count
for file_path, files_relative_path in zip(files_paths, files_relative_paths):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ data:
connectorSubtype: file
connectorType: source
definitionId: 31e3242f-dee7-4cdc-a4b8-8e06c5458517
dockerImageTag: 1.7.8
dockerImageTag: 1.8.0
dockerRepository: airbyte/source-sftp-bulk
documentationUrl: https://docs.airbyte.com/integrations/sources/sftp-bulk
githubIssueLabel: source-sftp-bulk
Expand Down
Loading
Loading