Skip to content

Commit fd70913

Browse files
authored
SAT: compatibility tests for catalogs (#15486)
1 parent 1ad5152 commit fd70913

File tree

8 files changed

+414
-102
lines changed

8 files changed

+414
-102
lines changed

airbyte-integrations/bases/source-acceptance-test/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 0.1.62
4+
Backward compatibility tests: add syntactic validation of catalogs [#15486](https://github.com/airbytehq/airbyte/pull/15486/)
5+
36
## 0.1.61
47
Add unit tests coverage computation [#15443](https://github.com/airbytehq/airbyte/pull/15443/).
58

airbyte-integrations/bases/source-acceptance-test/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ COPY pytest.ini setup.py ./
3333
COPY source_acceptance_test ./source_acceptance_test
3434
RUN pip install .
3535

36-
LABEL io.airbyte.version=0.1.61
36+
LABEL io.airbyte.version=0.1.62
3737
LABEL io.airbyte.name=airbyte/source-acceptance-test
3838

3939
ENTRYPOINT ["python", "-m", "pytest", "-p", "source_acceptance_test.plugin", "-r", "fEsx"]

airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ class Status(Enum):
5757
class DiscoveryTestConfig(BaseConfig):
5858
config_path: str = config_path
5959
timeout_seconds: int = timeout_seconds
60+
backward_compatibility_tests_config: BackwardCompatibilityTestsConfig = Field(
61+
description="Configuration for the backward compatibility tests.", default=BackwardCompatibilityTestsConfig()
62+
)
6063

6164

6265
class ExpectedRecordsConfig(BaseModel):

airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/conftest.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,12 @@ def cached_schemas_fixture() -> MutableMapping[str, AirbyteStream]:
178178
return {}
179179

180180

181+
@pytest.fixture(name="previous_cached_schemas", scope="session")
182+
def previous_cached_schemas_fixture() -> MutableMapping[str, AirbyteStream]:
183+
"""Simple cache for discovered catalog of previous connector: stream_name -> json_schema"""
184+
return {}
185+
186+
181187
@pytest.fixture(name="discovered_catalog")
182188
def discovered_catalog_fixture(connector_config, docker_runner: ConnectorRunner, cached_schemas) -> MutableMapping[str, AirbyteStream]:
183189
"""JSON schemas for each stream"""
@@ -190,6 +196,19 @@ def discovered_catalog_fixture(connector_config, docker_runner: ConnectorRunner,
190196
return cached_schemas
191197

192198

199+
@pytest.fixture(name="previous_discovered_catalog")
200+
def previous_discovered_catalog_fixture(
201+
connector_config, previous_connector_docker_runner: ConnectorRunner, previous_cached_schemas
202+
) -> MutableMapping[str, AirbyteStream]:
203+
"""JSON schemas for each stream"""
204+
if not previous_cached_schemas:
205+
output = previous_connector_docker_runner.call_discover(config=connector_config)
206+
catalogs = [message.catalog for message in output if message.type == Type.CATALOG]
207+
for stream in catalogs[-1].streams:
208+
previous_cached_schemas[stream.name] = stream
209+
return previous_cached_schemas
210+
211+
193212
@pytest.fixture
194213
def detailed_logger() -> Logger:
195214
"""

airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/tests/test_core.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828
from docker.errors import ContainerError
2929
from jsonschema._utils import flatten
3030
from source_acceptance_test.base import BaseTest
31-
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig, SpecTestConfig
31+
from source_acceptance_test.config import BasicReadTestConfig, ConnectionTestConfig, DiscoveryTestConfig, SpecTestConfig
3232
from source_acceptance_test.utils import ConnectorRunner, SecretDict, filter_output, make_hashable, verify_records_schema
33-
from source_acceptance_test.utils.backward_compatibility import SpecDiffChecker, validate_previous_configs
33+
from source_acceptance_test.utils.backward_compatibility import CatalogDiffChecker, SpecDiffChecker, validate_previous_configs
3434
from source_acceptance_test.utils.common import find_all_values_for_key_in_schema, find_keyword_schema
3535
from source_acceptance_test.utils.json_schema_helper import JsonSchemaHelper, get_expected_schema_structure, get_object_structure
3636

@@ -192,7 +192,7 @@ def test_backward_compatibility(
192192
assert isinstance(actual_connector_spec, ConnectorSpecification) and isinstance(previous_connector_spec, ConnectorSpecification)
193193
spec_diff = self.compute_spec_diff(actual_connector_spec, previous_connector_spec)
194194
checker = SpecDiffChecker(spec_diff)
195-
checker.assert_spec_is_backward_compatible()
195+
checker.assert_is_backward_compatible()
196196
validate_previous_configs(previous_connector_spec, actual_connector_spec, number_of_configs_to_generate)
197197

198198
def test_additional_properties_is_true(self, actual_connector_spec: ConnectorSpecification):
@@ -235,6 +235,31 @@ def test_check(self, connector_config, inputs: ConnectionTestConfig, docker_runn
235235

236236
@pytest.mark.default_timeout(30)
237237
class TestDiscovery(BaseTest):
238+
@staticmethod
239+
def compute_discovered_catalog_diff(
240+
discovered_catalog: MutableMapping[str, AirbyteStream], previous_discovered_catalog: MutableMapping[str, AirbyteStream]
241+
):
242+
return DeepDiff(
243+
{stream_name: airbyte_stream.dict().pop("json_schema") for stream_name, airbyte_stream in previous_discovered_catalog.items()},
244+
{stream_name: airbyte_stream.dict().pop("json_schema") for stream_name, airbyte_stream in discovered_catalog.items()},
245+
view="tree",
246+
ignore_order=True,
247+
)
248+
249+
@pytest.fixture(name="skip_backward_compatibility_tests")
250+
def skip_backward_compatibility_tests_fixture(
251+
self, inputs: DiscoveryTestConfig, previous_connector_docker_runner: ConnectorRunner
252+
) -> bool:
253+
if previous_connector_docker_runner is None:
254+
pytest.skip("The previous connector image could not be retrieved.")
255+
256+
# Get the real connector version in case 'latest' is used in the config:
257+
previous_connector_version = previous_connector_docker_runner._image.labels.get("io.airbyte.version")
258+
259+
if previous_connector_version == inputs.backward_compatibility_tests_config.disable_for_version:
260+
pytest.skip(f"Backward compatibility tests are disabled for version {previous_connector_version}.")
261+
return False
262+
238263
def test_discover(self, connector_config, docker_runner: ConnectorRunner):
239264
"""Verify that discover produce correct schema."""
240265
output = docker_runner.call_discover(config=connector_config)
@@ -307,6 +332,23 @@ def test_additional_properties_is_true(self, discovered_catalog: Mapping[str, An
307332
[additional_properties_value is True for additional_properties_value in additional_properties_values]
308333
), "When set, additionalProperties field value must be true for backward compatibility."
309334

335+
@pytest.mark.default_timeout(60)
336+
@pytest.mark.backward_compatibility
337+
def test_backward_compatibility(
338+
self,
339+
skip_backward_compatibility_tests: bool,
340+
discovered_catalog: MutableMapping[str, AirbyteStream],
341+
previous_discovered_catalog: MutableMapping[str, AirbyteStream],
342+
):
343+
"""Check if the current spec is backward_compatible:
344+
1. Perform multiple hardcoded syntactic checks with SpecDiffChecker.
345+
2. Validate fake generated previous configs against the actual connector specification with validate_previous_configs.
346+
"""
347+
assert isinstance(discovered_catalog, MutableMapping) and isinstance(previous_discovered_catalog, MutableMapping)
348+
catalog_diff = self.compute_discovered_catalog_diff(discovered_catalog, previous_discovered_catalog)
349+
checker = CatalogDiffChecker(catalog_diff)
350+
checker.assert_is_backward_compatible()
351+
310352

311353
def primary_keys_for_records(streams, records):
312354
streams_with_primary_key = [stream for stream in streams if stream.stream.source_defined_primary_key]

airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/backward_compatibility.py

Lines changed: 84 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,52 +2,36 @@
22
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
33
#
44

5+
from abc import ABC, abstractmethod
6+
from multiprocessing import context
7+
58
import jsonschema
69
from airbyte_cdk.models import ConnectorSpecification
710
from deepdiff import DeepDiff
8-
from hypothesis import given, settings
11+
from hypothesis import HealthCheck, Verbosity, given, settings
912
from hypothesis_jsonschema import from_schema
1013
from source_acceptance_test.utils import SecretDict
1114

1215

13-
class NonBackwardCompatibleSpecError(Exception):
16+
class NonBackwardCompatibleError(Exception):
1417
pass
1518

1619

17-
class SpecDiffChecker:
18-
"""A class to perform multiple backward compatible checks on a spec diff"""
19-
20+
class BaseDiffChecker(ABC):
2021
def __init__(self, diff: DeepDiff) -> None:
2122
self._diff = diff
2223

23-
def assert_spec_is_backward_compatible(self):
24-
self.check_if_declared_new_required_field()
25-
self.check_if_added_a_new_required_property()
26-
self.check_if_value_of_type_field_changed()
27-
# self.check_if_new_type_was_added() We want to allow type expansion atm
28-
self.check_if_type_of_type_field_changed()
29-
self.check_if_field_was_made_not_nullable()
30-
self.check_if_enum_was_narrowed()
31-
self.check_if_declared_new_enum_field()
32-
3324
def _raise_error(self, message: str):
34-
raise NonBackwardCompatibleSpecError(f"{message}: {self._diff.pretty()}")
25+
raise NonBackwardCompatibleError(f"{context} - {message}. Diff: {self._diff.pretty()}")
3526

36-
def check_if_declared_new_required_field(self):
37-
"""Check if the new spec declared a 'required' field."""
38-
added_required_fields = [
39-
addition for addition in self._diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "required"
40-
]
41-
if added_required_fields:
42-
self._raise_error("The current spec declared a new 'required' field")
27+
@property
28+
@abstractmethod
29+
def context(self): # pragma: no cover
30+
pass
4331

44-
def check_if_added_a_new_required_property(self):
45-
"""Check if the new spec added a property to the 'required' list."""
46-
added_required_properties = [
47-
addition for addition in self._diff.get("iterable_item_added", []) if addition.up.path(output_format="list")[-1] == "required"
48-
]
49-
if added_required_properties:
50-
self._raise_error("A new property was added to 'required'")
32+
@abstractmethod
33+
def assert_is_backward_compatible(self): # pragma: no cover
34+
pass
5135

5236
def check_if_value_of_type_field_changed(self):
5337
"""Check if a type was changed"""
@@ -59,9 +43,9 @@ def check_if_value_of_type_field_changed(self):
5943
change for change in self._diff.get("values_changed", []) if change.path(output_format="list")[-2] == "type"
6044
]
6145
if type_values_changed or type_values_changed_in_list:
62-
self._raise_error("The current spec changed the value of a 'type' field")
46+
self._raise_error("The'type' field value was changed.")
6347

64-
def check_if_new_type_was_added(self):
48+
def check_if_new_type_was_added(self): # pragma: no cover
6549
"""Detect type value added to type list if new type value is not None (e.g ["str"] -> ["str", "int"])"""
6650
new_values_in_type_list = [
6751
change
@@ -70,7 +54,7 @@ def check_if_new_type_was_added(self):
7054
if change.t2 != "null"
7155
]
7256
if new_values_in_type_list:
73-
self._raise_error("The current spec changed the value of a 'type' field")
57+
self._raise_error("A new value was added to a 'type' field")
7458

7559
def check_if_type_of_type_field_changed(self):
7660
"""
@@ -90,29 +74,57 @@ def check_if_type_of_type_field_changed(self):
9074
# This might be something already guaranteed by JSON schema validation.
9175
if isinstance(change.t1, str):
9276
if not isinstance(change.t2, list):
93-
self._raise_error("The current spec change a type field from string to an invalid value.")
94-
if not 0 < len(change.t2) <= 2:
95-
self._raise_error(
96-
"The current spec change a type field from string to an invalid value. The type list length should not be empty and have a maximum of two items."
97-
)
77+
self._raise_error("A 'type' field was changed from string to an invalid value.")
9878
# If the new type field is a list we want to make sure it only has the original type (t1) and null: e.g. "str" -> ["str", "null"]
9979
# We want to raise an error otherwise.
10080
t2_not_null_types = [_type for _type in change.t2 if _type != "null"]
10181
if not (len(t2_not_null_types) == 1 and t2_not_null_types[0] == change.t1):
102-
self._raise_error("The current spec change a type field to a list with multiple invalid values.")
82+
self._raise_error("The 'type' field was changed to a list with multiple invalid values")
10383
if isinstance(change.t1, list):
10484
if not isinstance(change.t2, str):
105-
self._raise_error("The current spec change a type field from list to an invalid value.")
85+
self._raise_error("The 'type' field was changed from a list to an invalid value")
10686
if not (len(change.t1) == 1 and change.t2 == change.t1[0]):
107-
self._raise_error("The current spec narrowed a field type.")
87+
self._raise_error("An element was removed from the list of 'type'")
88+
89+
90+
class SpecDiffChecker(BaseDiffChecker):
91+
"""A class to perform backward compatibility checks on a connector specification diff"""
92+
93+
context = "Specification"
94+
95+
def assert_is_backward_compatible(self):
96+
self.check_if_declared_new_required_field()
97+
self.check_if_added_a_new_required_property()
98+
self.check_if_value_of_type_field_changed()
99+
# self.check_if_new_type_was_added() We want to allow type expansion atm
100+
self.check_if_type_of_type_field_changed()
101+
self.check_if_field_was_made_not_nullable()
102+
self.check_if_enum_was_narrowed()
103+
self.check_if_declared_new_enum_field()
104+
105+
def check_if_declared_new_required_field(self):
106+
"""Check if the new spec declared a 'required' field."""
107+
added_required_fields = [
108+
addition for addition in self._diff.get("dictionary_item_added", []) if addition.path(output_format="list")[-1] == "required"
109+
]
110+
if added_required_fields:
111+
self._raise_error("A new 'required' field was declared.")
112+
113+
def check_if_added_a_new_required_property(self):
114+
"""Check if the new spec added a property to the 'required' list"""
115+
added_required_properties = [
116+
addition for addition in self._diff.get("iterable_item_added", []) if addition.up.path(output_format="list")[-1] == "required"
117+
]
118+
if added_required_properties:
119+
self._raise_error("A new property was added to 'required'")
108120

109121
def check_if_field_was_made_not_nullable(self):
110122
"""Detect when field was made not nullable but is still a list: e.g ["string", "null"] -> ["string"]"""
111123
removed_nullable = [
112124
change for change in self._diff.get("iterable_item_removed", []) if change.path(output_format="list")[-2] == "type"
113125
]
114126
if removed_nullable:
115-
self._raise_error("The current spec narrowed a field type or made a field not nullable.")
127+
self._raise_error("A field type was narrowed or made a field not nullable")
116128

117129
def check_if_enum_was_narrowed(self):
118130
"""Check if the list of values in a enum was shortened in a spec."""
@@ -122,7 +134,7 @@ def check_if_enum_was_narrowed(self):
122134
if enum_removal.up.path(output_format="list")[-1] == "enum"
123135
]
124136
if enum_removals:
125-
self._raise_error("The current spec narrowed an enum field.")
137+
self._raise_error("An enum field was narrowed.")
126138

127139
def check_if_declared_new_enum_field(self):
128140
"""Check if an 'enum' field was added to the spec."""
@@ -132,7 +144,7 @@ def check_if_declared_new_enum_field(self):
132144
if enum_addition.path(output_format="list")[-1] == "enum"
133145
]
134146
if enum_additions:
135-
self._raise_error("An 'enum' field was declared on an existing property of the spec.")
147+
self._raise_error("An 'enum' field was declared on an existing property")
136148

137149

138150
def validate_previous_configs(
@@ -143,13 +155,34 @@ def validate_previous_configs(
143155
2. Validate a fake previous config against the actual connector specification json schema."""
144156

145157
@given(from_schema(previous_connector_spec.dict()["connectionSpecification"]))
146-
@settings(max_examples=number_of_configs_to_generate)
158+
@settings(max_examples=number_of_configs_to_generate, verbosity=Verbosity.quiet, suppress_health_check=(HealthCheck.too_slow,))
147159
def check_fake_previous_config_against_actual_spec(fake_previous_config):
148-
fake_previous_config = SecretDict(fake_previous_config)
149-
filtered_fake_previous_config = {key: value for key, value in fake_previous_config.data.items() if not key.startswith("_")}
150-
try:
151-
jsonschema.validate(instance=filtered_fake_previous_config, schema=actual_connector_spec.connectionSpecification)
152-
except jsonschema.exceptions.ValidationError as err:
153-
raise NonBackwardCompatibleSpecError(err)
160+
if isinstance(fake_previous_config, dict): # Looks like hypothesis-jsonschema not only generate dict objects...
161+
fake_previous_config = SecretDict(fake_previous_config)
162+
filtered_fake_previous_config = {key: value for key, value in fake_previous_config.data.items() if not key.startswith("_")}
163+
try:
164+
jsonschema.validate(instance=filtered_fake_previous_config, schema=actual_connector_spec.connectionSpecification)
165+
except jsonschema.exceptions.ValidationError as err:
166+
raise NonBackwardCompatibleError(err)
154167

155168
check_fake_previous_config_against_actual_spec()
169+
170+
171+
class CatalogDiffChecker(BaseDiffChecker):
172+
"""A class to perform backward compatibility checks on a discoverd catalog diff"""
173+
174+
context = "Catalog"
175+
176+
def assert_is_backward_compatible(self):
177+
self.check_if_stream_was_removed()
178+
self.check_if_value_of_type_field_changed()
179+
self.check_if_type_of_type_field_changed()
180+
181+
def check_if_stream_was_removed(self):
182+
"""Check if a stream was removed from the catalog."""
183+
removed_streams = []
184+
for removal in self._diff.get("dictionary_item_removed", []):
185+
if removal.path() != "root" and removal.up.path() == "root":
186+
removed_streams.append(removal.path(output_format="list")[0])
187+
if removed_streams:
188+
self._raise_error(f"The following streams were removed: {','.join(removed_streams)}")

0 commit comments

Comments
 (0)