Skip to content

Commit a6ab528

Browse files
committed
Fix tests and format
1 parent 18b1047 commit a6ab528

File tree

11 files changed

+121
-50
lines changed

11 files changed

+121
-50
lines changed

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#
44
from dagster import Definitions
55

6-
from orchestrator.jobs.registry import generate_registry, generate_registry_markdown, generate_local_metadata_files
76
from orchestrator.resources.gcp import gcp_gcs_client, gcs_bucket_manager, gcs_directory_blobs, gcs_file_blob, gcs_file_manager
87
from orchestrator.resources.github import github_client, github_connector_repo, github_connectors_directory
98
from orchestrator.resources.local import simple_local_file_manager
@@ -48,6 +47,8 @@
4847
cloud_registry_diff_dataframe,
4948
oss_registry_diff_dataframe,
5049
metadata_directory_report,
50+
oss_registry_diff_report,
51+
cloud_registry_diff_report,
5152
)
5253

5354
from orchestrator.jobs.registry import generate_registry_markdown, generate_local_metadata_files, generate_registry
@@ -64,6 +65,7 @@
6465
cached_specs,
6566
cloud_destinations_dataframe,
6667
cloud_registry_diff_dataframe,
68+
cloud_registry_diff_report,
6769
cloud_registry_diff,
6870
cloud_registry_from_metadata,
6971
cloud_sources_dataframe,
@@ -83,6 +85,7 @@
8385
metadata_directory_report,
8486
oss_destinations_dataframe,
8587
oss_registry_diff_dataframe,
88+
oss_registry_diff_report,
8689
oss_registry_diff,
8790
oss_registry_from_metadata,
8891
oss_sources_dataframe,
@@ -105,12 +108,9 @@
105108
"gcs_bucket_manager": gcs_bucket_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}}),
106109
"registry_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REGISTRIES_FOLDER}),
107110
"registry_report_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REPORT_FOLDER}),
108-
"registry_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REGISTRIES_FOLDER}),
109111
"metadata_file_blobs": gcs_directory_blobs.configured({"prefix": METADATA_FOLDER, "suffix": METADATA_FILE_NAME}),
110-
111112
"legacy_oss_registry_gcs_blob": gcs_file_blob.configured({"prefix": "", "gcs_filename": "oss_catalog.json"}),
112113
"legacy_cloud_registry_gcs_blob": gcs_file_blob.configured({"prefix": "", "gcs_filename": "cloud_catalog.json"}),
113-
114114
"latest_oss_registry_gcs_blob": gcs_file_blob.configured({"prefix": REGISTRIES_FOLDER, "gcs_filename": "oss_registry.json"}),
115115
"latest_cloud_registry_gcs_blob": gcs_file_blob.configured({"prefix": REGISTRIES_FOLDER, "gcs_filename": "cloud_registry.json"}),
116116
}

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/dev.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
import json
44
from pydash.collections import key_by
55
from deepdiff import DeepDiff
6-
from dagster import Output, asset, OpExecutionContext
6+
from dagster import Output, asset, OpExecutionContext, MetadataValue
77
from typing import List
8+
89
from orchestrator.utils.dagster_helpers import OutputDataFrame, output_dataframe
910
from orchestrator.models.metadata import PartialMetadataDefinition
11+
1012
from metadata_service.models.generated.ConnectorRegistryV0 import ConnectorRegistryV0
1113

1214

@@ -264,9 +266,37 @@ def oss_registry_diff_dataframe(oss_registry_diff: dict) -> OutputDataFrame:
264266

265267

266268
@asset(required_resource_keys={"metadata_file_blobs"}, group_name=GROUP_NAME)
267-
def metadata_directory_report(context):
269+
def metadata_directory_report(context: OpExecutionContext):
268270
metadata_file_blobs = context.resources.metadata_file_blobs
269271
blobs = [blob.name for blob in metadata_file_blobs if blob.name.endswith("metadata.yaml")]
270272
blobs_df = pd.DataFrame(blobs)
271273

272274
return output_dataframe(blobs_df)
275+
276+
277+
@asset(required_resource_keys={"registry_report_directory_manager"}, group_name=GROUP_NAME)
278+
def oss_registry_diff_report(context: OpExecutionContext, oss_registry_diff_dataframe: pd.DataFrame):
279+
markdown = oss_registry_diff_dataframe.to_markdown()
280+
281+
registry_report_directory_manager = context.resources.registry_report_directory_manager
282+
file_handle = registry_report_directory_manager.write_data(markdown.encode(), ext="md", key="dev/oss_registry_diff_report")
283+
284+
metadata = {
285+
"preview": MetadataValue.md(markdown),
286+
"gcs_path": MetadataValue.url(file_handle.gcs_path),
287+
}
288+
return Output(metadata=metadata, value=file_handle)
289+
290+
291+
@asset(required_resource_keys={"registry_report_directory_manager"}, group_name=GROUP_NAME)
292+
def cloud_registry_diff_report(context: OpExecutionContext, cloud_registry_diff_dataframe: pd.DataFrame):
293+
markdown = cloud_registry_diff_dataframe.to_markdown()
294+
295+
registry_report_directory_manager = context.resources.registry_report_directory_manager
296+
file_handle = registry_report_directory_manager.write_data(markdown.encode(), ext="md", key="dev/cloud_registry_diff_report")
297+
298+
metadata = {
299+
"preview": MetadataValue.md(markdown),
300+
"gcs_path": MetadataValue.url(file_handle.gcs_path),
301+
}
302+
return Output(metadata=metadata, value=file_handle)

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/metadata.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,9 @@ def valid_metadata_report_dataframe(overrode_metadata_definitions: List[PartialM
202202
def legacy_registry_derived_metadata_definitions(
203203
legacy_cloud_sources_dataframe, legacy_cloud_destinations_dataframe, legacy_oss_sources_dataframe, legacy_oss_destinations_dataframe
204204
) -> Output[List[PartialMetadataDefinition]]:
205-
sources_metadata_list = merge_into_metadata_definitions("sourceDefinitionId", "source", legacy_oss_sources_dataframe, legacy_cloud_sources_dataframe)
205+
sources_metadata_list = merge_into_metadata_definitions(
206+
"sourceDefinitionId", "source", legacy_oss_sources_dataframe, legacy_cloud_sources_dataframe
207+
)
206208
destinations_metadata_list = merge_into_metadata_definitions(
207209
"destinationDefinitionId", "destination", legacy_oss_destinations_dataframe, legacy_cloud_destinations_dataframe
208210
)

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/assets/registry.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from metadata_service.models.generated.ConnectorRegistryV0 import ConnectorRegistryV0
2020

2121

22-
2322
GROUP_NAME = "registry"
2423

2524
# ERRORS
@@ -112,7 +111,9 @@ def is_metadata_connector_type(metadata_definition: dict, connector_type: str) -
112111
return metadata_definition["data"]["connectorType"] == connector_type
113112

114113

115-
def construct_registry_from_metadata(legacy_registry_derived_metadata_definitions: List[MetadataDefinition], registry_name: str) -> ConnectorRegistryV0:
114+
def construct_registry_from_metadata(
115+
legacy_registry_derived_metadata_definitions: List[MetadataDefinition], registry_name: str
116+
) -> ConnectorRegistryV0:
116117
"""Construct the registry from the metadata definitions.
117118
118119
Args:
@@ -136,6 +137,7 @@ def construct_registry_from_metadata(legacy_registry_derived_metadata_definition
136137

137138
return {"sources": registry_sources, "destinations": registry_destinations}
138139

140+
139141
def construct_registry_with_spec_from_registry(registry: dict, cached_specs: OutputDataFrame) -> dict:
140142
entries = [("source", entry) for entry in registry["sources"]] + [("destinations", entry) for entry in registry["destinations"]]
141143

@@ -157,7 +159,10 @@ def construct_registry_with_spec_from_registry(registry: dict, cached_specs: Out
157159
raise MissingCachedSpecError(f"No cached spec found for {entry['dockerRegistry']:{entry['dockerImageTag']}}")
158160
return registry_with_specs
159161

160-
def persist_registry_to_json(registry: ConnectorRegistryV0, registry_name: str, registry_directory_manager: GCSFileManager) -> GCSFileHandle:
162+
163+
def persist_registry_to_json(
164+
registry: ConnectorRegistryV0, registry_name: str, registry_directory_manager: GCSFileManager
165+
) -> GCSFileHandle:
161166
"""Persist the registry to a json file.
162167
163168
Args:
@@ -173,10 +178,14 @@ def persist_registry_to_json(registry: ConnectorRegistryV0, registry_name: str,
173178
file_handle = registry_directory_manager.write_data(registry_json.encode("utf-8"), ext="json", key=registry_file_name)
174179
return file_handle
175180

181+
176182
# New Registry
177183

184+
178185
@asset(required_resource_keys={"registry_directory_manager"}, group_name=GROUP_NAME)
179-
def cloud_registry_from_metadata(context: OpExecutionContext, metadata_definitions: List[MetadataDefinition], cached_specs: OutputDataFrame) -> Output[ConnectorRegistryV0]:
186+
def cloud_registry_from_metadata(
187+
context: OpExecutionContext, metadata_definitions: List[MetadataDefinition], cached_specs: OutputDataFrame
188+
) -> Output[ConnectorRegistryV0]:
180189
"""
181190
This asset is used to generate the cloud registry from the metadata definitions.
182191
"""
@@ -185,19 +194,21 @@ def cloud_registry_from_metadata(context: OpExecutionContext, metadata_definitio
185194

186195
from_metadata = construct_registry_from_metadata(metadata_definitions, registry_name)
187196
registry_dict = construct_registry_with_spec_from_registry(from_metadata, cached_specs)
188-
reigstry_model = ConnectorRegistryV0.parse_obj(registry_dict)
197+
registry_model = ConnectorRegistryV0.parse_obj(registry_dict)
189198

190-
file_handle = persist_registry_to_json(reigstry_model, registry_name, registry_directory_manager)
199+
file_handle = persist_registry_to_json(registry_model, registry_name, registry_directory_manager)
191200

192201
metadata = {
193202
"gcs_path": MetadataValue.url(file_handle.gcs_path),
194203
}
195204

196-
return Output(metadata=metadata, value=reigstry_model)
205+
return Output(metadata=metadata, value=registry_model)
197206

198207

199208
@asset(required_resource_keys={"registry_directory_manager"}, group_name=GROUP_NAME)
200-
def oss_registry_from_metadata(context: OpExecutionContext, metadata_definitions: List[MetadataDefinition], cached_specs: OutputDataFrame) -> Output[ConnectorRegistryV0]:
209+
def oss_registry_from_metadata(
210+
context: OpExecutionContext, metadata_definitions: List[MetadataDefinition], cached_specs: OutputDataFrame
211+
) -> Output[ConnectorRegistryV0]:
201212
"""
202213
This asset is used to generate the oss registry from the metadata definitions.
203214
"""
@@ -206,15 +217,16 @@ def oss_registry_from_metadata(context: OpExecutionContext, metadata_definitions
206217

207218
from_metadata = construct_registry_from_metadata(metadata_definitions, registry_name)
208219
registry_dict = construct_registry_with_spec_from_registry(from_metadata, cached_specs)
209-
reigstry_model = ConnectorRegistryV0.parse_obj(registry_dict)
220+
registry_model = ConnectorRegistryV0.parse_obj(registry_dict)
210221

211-
file_handle = persist_registry_to_json(reigstry_model, registry_name, registry_directory_manager)
222+
file_handle = persist_registry_to_json(registry_model, registry_name, registry_directory_manager)
212223

213224
metadata = {
214225
"gcs_path": MetadataValue.url(file_handle.gcs_path),
215226
}
216227

217-
return Output(metadata=metadata, value=reigstry_model)
228+
return Output(metadata=metadata, value=registry_model)
229+
218230

219231
@asset(group_name=GROUP_NAME)
220232
def cloud_sources_dataframe(cloud_registry_from_metadata: ConnectorRegistryV0) -> OutputDataFrame:
@@ -243,8 +255,10 @@ def oss_destinations_dataframe(oss_registry_from_metadata: ConnectorRegistryV0)
243255
destinations = oss_registry_from_metadata_dict["destinations"]
244256
return output_dataframe(pd.DataFrame(destinations))
245257

258+
246259
# Old Registry
247260

261+
248262
@asset(group_name=GROUP_NAME)
249263
def legacy_cloud_sources_dataframe(legacy_cloud_registry_dict: dict) -> OutputDataFrame:
250264
sources = legacy_cloud_registry_dict["sources"]
@@ -278,6 +292,7 @@ def legacy_cloud_registry(legacy_cloud_registry_dict: dict) -> ConnectorRegistry
278292
def legacy_oss_registry(legacy_oss_registry_dict: dict) -> ConnectorRegistryV0:
279293
return ConnectorRegistryV0.parse_obj(legacy_oss_registry_dict)
280294

295+
281296
@asset(required_resource_keys={"legacy_cloud_registry_gcs_blob"}, group_name=GROUP_NAME)
282297
def legacy_cloud_registry_dict(context: OpExecutionContext) -> dict:
283298
oss_registry_file = context.resources.legacy_cloud_registry_gcs_blob
@@ -292,4 +307,3 @@ def legacy_oss_registry_dict(context: OpExecutionContext) -> dict:
292307
json_string = oss_registry_file.download_as_string().decode("utf-8")
293308
oss_registry_dict = json.loads(json_string)
294309
return oss_registry_dict
295-

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/jobs/registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from dagster import define_asset_job, AssetSelection
22

3-
registries_inclusive = AssetSelection.keys("metadata_directory_report", "cloud_registry_from_metadata", "oss_registry_from_metadata").upstream()
3+
registries_inclusive = AssetSelection.keys(
4+
"metadata_directory_report", "cloud_registry_from_metadata", "oss_registry_from_metadata"
5+
).upstream()
46
registry_reports_inclusive = AssetSelection.keys("connector_registry_location_html", "connector_registry_location_markdown").upstream()
57

68
generate_registry = define_asset_job(name="generate_registry", selection=registries_inclusive)

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/models/metadata.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def is_valid(self) -> Tuple[bool, Optional[Any]]:
2525
except ValidationError as e:
2626
return (False, e)
2727

28+
2829
class PydanitcDictMixin:
2930
def __getitem__(self, key: str):
3031
return self.__dict__[key]
@@ -36,5 +37,6 @@ def __setitem__(self, key: str, value: Any):
3637
class PartialMetadataDefinition(PydanticDelayValidationMixin, PydanitcDictMixin, ConnectorMetadataDefinitionV0):
3738
pass
3839

40+
3941
class MetadataDefinition(PydanitcDictMixin, ConnectorMetadataDefinitionV0):
4042
pass

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/resources/gcp.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@ def gcs_bucket_manager(resource_context: InitResourceContext) -> storage.Bucket:
3737

3838
@resource(
3939
required_resource_keys={"gcp_gcs_client"},
40-
config_schema={
41-
"gcs_bucket": StringSource,
42-
"prefix": StringSource
43-
},
40+
config_schema={"gcs_bucket": StringSource, "prefix": StringSource},
4441
)
4542
def gcs_file_manager(resource_context) -> GCSFileManager:
4643
"""FileManager that provides abstract access to GCS.

airbyte-ci/connectors/metadata_service/orchestrator/orchestrator/utils/object_helpers.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def f(*args, **kwargs):
3131

3232
return f
3333

34+
3435
def to_json_sanitized_dict(pydantic_model_obj: BaseModel) -> dict:
3536
"""A helper function to convert a pydantic model to a sanitized dict.
3637

airbyte-ci/connectors/metadata_service/orchestrator/tests/test_debug.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@
1010
cloud_sources_dataframe,
1111
oss_registry_from_metadata,
1212
cloud_registry_from_metadata,
13-
cloud_registry_from_metadata,
14-
oss_registry_from_metadata,
1513
legacy_oss_registry_dict,
16-
legacy_oss_registry
14+
legacy_oss_registry,
1715
)
1816
from orchestrator.assets.metadata import (
1917
legacy_registry_derived_metadata_definitions,
@@ -64,7 +62,9 @@ def debug_registry_projection():
6462
oss_sources_df = oss_sources_dataframe(oss_registry_dict).value
6563
# github_connector_folders_list = github_connector_folders(context).value
6664

67-
legacy_registry_derived_metadata_definitions(context, cloud_sources_df, cloud_destinations_df, oss_sources_df, oss_destinations_df).value
65+
legacy_registry_derived_metadata_definitions(
66+
context, cloud_sources_df, cloud_destinations_df, oss_sources_df, oss_destinations_df
67+
).value
6868
# valid_metadata_report_dataframe_df = valid_metadata_report_dataframe(metadata_definitions_df).value
6969

7070
# all_sources_df = all_sources_dataframe(cloud_sources_df, oss_sources_df, github_connector_folders_list, valid_metadata_report_dataframe_df)
@@ -73,6 +73,7 @@ def debug_registry_projection():
7373
# connector_registry_location_html(context, all_sources_df, all_destinations_df)
7474
# connector_registry_location_markdown(context, all_sources_df, all_destinations_df)
7575

76+
7677
def debug_registry_generation():
7778
resources = {
7879
"gcp_gcs_client": gcp_gcs_client.configured(
@@ -81,7 +82,7 @@ def debug_registry_generation():
8182
}
8283
),
8384
"gcs_bucket_manager": gcs_bucket_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}}),
84-
"registry_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REGISTRY_FOLDER}),
85+
"registry_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REGISTRIES_FOLDER}),
8586
"metadata_file_blobs": gcs_directory_blobs.configured({"prefix": METADATA_FOLDER, "suffix": METADATA_FILE_NAME}),
8687
}
8788

@@ -90,15 +91,16 @@ def debug_registry_generation():
9091
oss_registry_from_metadata(context, metadata_definitions_asset).value
9192
# cloud_registry_from_metadata(context, metadata_definitions_asset).value
9293

93-
def test_debug_registry_diff():
94+
95+
def debug_registry_diff():
9496
resources = {
9597
"gcp_gcs_client": gcp_gcs_client.configured(
9698
{
9799
"gcp_gcs_cred_string": {"env": "GCS_CREDENTIALS"},
98100
}
99101
),
100102
"gcs_bucket_manager": gcs_bucket_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}}),
101-
"registry_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REGISTRY_FOLDER}),
103+
"registry_directory_manager": gcs_file_manager.configured({"gcs_bucket": {"env": "METADATA_BUCKET"}, "prefix": REGISTRIES_FOLDER}),
102104
"metadata_file_blobs": gcs_directory_blobs.configured({"prefix": METADATA_FOLDER, "suffix": METADATA_FILE_NAME}),
103105
"legacy_oss_registry_gcs_blob": gcs_file_blob.configured({"prefix": "", "gcs_filename": "oss_catalog.json"}),
104106
}

0 commit comments

Comments
 (0)