Skip to content

Commit de6811f

Browse files
authored
Dagster registry generation and persist (#25260)
* Define legacy and latest registry at same time * Fix file persist * Get persist to work * Expand generate models to all models * Add new registry types * Fix class name * Get valid registry type definition * Remove uuid hack * Fix ids for json schemas * Resolve issues * Update legacy assets * Add typed legacy registry * Fix icon issue * Regenerate models * Update spec mask to use registry type * Move v1 to v0 * Add json sanitized dict helper * Fix tests and format * Ensure we only get latest * Code review comments * fix missing spec error * Move registry code to helper
1 parent 7d75e5e commit de6811f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1372
-207
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env bash
2+
3+
set -e
4+
5+
6+
YAML_DIR=metadata_service/models/src
7+
OUTPUT_DIR=metadata_service/models/generated
8+
9+
# Ensure the yaml directory exists
10+
if [ ! -d "$YAML_DIR" ]; then
11+
echo "The yaml directory does not exist: $YAML_DIR"
12+
exit 1
13+
fi
14+
15+
16+
rm -rf "$OUTPUT_DIR"/*.py
17+
mkdir -p "$OUTPUT_DIR"
18+
19+
echo "# generated by generate-python-classes" > "$OUTPUT_DIR"/__init__.py
20+
21+
for f in "$YAML_DIR"/*.yaml; do
22+
filename_wo_ext=$(basename "$f" | cut -d . -f 1)
23+
echo "from .$filename_wo_ext import *" >> "$OUTPUT_DIR"/__init__.py
24+
25+
datamodel-codegen \
26+
--input "$YAML_DIR/$filename_wo_ext.yaml" \
27+
--output "$OUTPUT_DIR/$filename_wo_ext.py" \
28+
--use-title-as-name \
29+
--use-double-quotes \
30+
--enum-field-as-literal all \
31+
--disable-timestamp
32+
done

airbyte-ci/connectors/metadata_service/lib/metadata_service/commands.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import click
77
from metadata_service.gcs_upload import upload_metadata_to_gcs
88
from metadata_service.validators.metadata_validator import validate_metadata_file
9+
from metadata_service.constants import METADATA_FILE_NAME
910
from pydantic import ValidationError
1011

1112

@@ -17,15 +18,15 @@ def metadata_service():
1718
@metadata_service.command(help="Validate a given metadata YAML file.")
1819
@click.argument("file_path", type=click.Path(exists=True, path_type=pathlib.Path))
1920
def validate(file_path: pathlib.Path):
20-
file_path = file_path if not file_path.is_dir() else file_path / "metadata.yaml"
21+
file_path = file_path if not file_path.is_dir() else file_path / METADATA_FILE_NAME
2122

2223
click.echo(f"Validating {file_path}...")
2324

2425
is_valid, error = validate_metadata_file(file_path)
2526
if is_valid:
26-
click.echo(f"{file_path} is a valid ConnectorMetadataDefinitionV1 YAML file.")
27+
click.echo(f"{file_path} is a valid ConnectorMetadataDefinitionV0 YAML file.")
2728
else:
28-
click.echo(f"{file_path} is not a valid ConnectorMetadataDefinitionV1 YAML file.")
29+
click.echo(f"{file_path} is not a valid ConnectorMetadataDefinitionV0 YAML file.")
2930
click.echo(str(error))
3031
exit(1)
3132

@@ -37,7 +38,7 @@ def validate(file_path: pathlib.Path):
3738
"--service-account-file-path", "-sa", type=click.Path(exists=True, path_type=pathlib.Path), envvar="GOOGLE_APPLICATION_CREDENTIALS"
3839
)
3940
def upload(metadata_file_path: pathlib.Path, bucket_name: str, service_account_file_path: pathlib.Path):
40-
metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / "metadata.yaml"
41+
metadata_file_path = metadata_file_path if not metadata_file_path.is_dir() else metadata_file_path / METADATA_FILE_NAME
4142
try:
4243
uploaded, blob_id = upload_metadata_to_gcs(bucket_name, metadata_file_path, service_account_file_path)
4344
except (ValidationError, FileNotFoundError) as e:

airbyte-ci/connectors/metadata_service/lib/metadata_service/gcs_upload.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import yaml
88
from google.cloud import storage
99
from google.oauth2 import service_account
10-
from metadata_service.models.generated.ConnectorMetadataDefinitionV1 import ConnectorMetadataDefinitionV1
10+
from metadata_service.models.generated.ConnectorMetadataDefinitionV0 import ConnectorMetadataDefinitionV0
1111
from metadata_service.constants import METADATA_FILE_NAME, METADATA_FOLDER
1212

1313

@@ -38,7 +38,7 @@ def upload_metadata_to_gcs(bucket_name: str, metadata_file_path: Path, service_a
3838
"""
3939
uploaded = False
4040
raw_metadata = yaml.safe_load(metadata_file_path.read_text())
41-
metadata = ConnectorMetadataDefinitionV1.parse_obj(raw_metadata)
41+
metadata = ConnectorMetadataDefinitionV0.parse_obj(raw_metadata)
4242

4343
credentials = service_account.Credentials.from_service_account_file(service_account_file_path)
4444
storage_client = storage.Client(credentials=credentials)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# generated by datamodel-codegen:
2+
# filename: ActorDefinitionResourceRequirements.yaml
3+
4+
from __future__ import annotations
5+
6+
from typing import List, Optional
7+
8+
from pydantic import BaseModel, Extra, Field
9+
from typing_extensions import Literal
10+
11+
12+
class ResourceRequirements(BaseModel):
13+
class Config:
14+
extra = Extra.forbid
15+
16+
cpu_request: Optional[str] = None
17+
cpu_limit: Optional[str] = None
18+
memory_request: Optional[str] = None
19+
memory_limit: Optional[str] = None
20+
21+
22+
class JobType(BaseModel):
23+
__root__: Literal[
24+
"get_spec",
25+
"check_connection",
26+
"discover_schema",
27+
"sync",
28+
"reset_connection",
29+
"connection_updater",
30+
"replicate",
31+
] = Field(
32+
...,
33+
description="enum that describes the different types of jobs that the platform runs.",
34+
title="JobType",
35+
)
36+
37+
38+
class JobTypeResourceLimit(BaseModel):
39+
class Config:
40+
extra = Extra.forbid
41+
42+
jobType: JobType
43+
resourceRequirements: ResourceRequirements
44+
45+
46+
class ActorDefinitionResourceRequirements(BaseModel):
47+
class Config:
48+
extra = Extra.forbid
49+
50+
default: Optional[ResourceRequirements] = Field(
51+
None,
52+
description="if set, these are the requirements that should be set for ALL jobs run for this actor definition.",
53+
)
54+
jobSpecific: Optional[List[JobTypeResourceLimit]] = None
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# generated by datamodel-codegen:
2+
# filename: AllowedHosts.yaml
3+
4+
from __future__ import annotations
5+
6+
from typing import List, Optional
7+
8+
from pydantic import BaseModel, Extra, Field
9+
10+
11+
class AllowedHosts(BaseModel):
12+
class Config:
13+
extra = Extra.allow
14+
15+
hosts: Optional[List[str]] = Field(
16+
None,
17+
description="An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.",
18+
)
Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,13 @@
11
# generated by datamodel-codegen:
2-
# filename: ConnectorMetadataDefinitionV1.yaml
2+
# filename: ConnectorMetadataDefinitionV0.yaml
33

44
from __future__ import annotations
55

6-
from enum import Enum
76
from typing import List, Optional
87
from uuid import UUID
98

109
from pydantic import AnyUrl, BaseModel, Extra, Field
11-
12-
13-
class ConnectorType(Enum):
14-
destination = "destination"
15-
source = "source"
16-
17-
18-
class ConnectorSubtype(Enum):
19-
api = "api"
20-
database = "database"
21-
file = "file"
22-
custom = "custom"
23-
message_queue = "message_queue"
24-
unknown = "unknown"
25-
26-
27-
class ReleaseStage(Enum):
28-
alpha = "alpha"
29-
beta = "beta"
30-
generally_available = "generally_available"
31-
source = "source"
10+
from typing_extensions import Literal
3211

3312

3413
class AllowedHosts(BaseModel):
@@ -79,14 +58,20 @@ class Config:
7958
memory_limit: Optional[str] = None
8059

8160

82-
class JobType(Enum):
83-
get_spec = "get_spec"
84-
check_connection = "check_connection"
85-
discover_schema = "discover_schema"
86-
sync = "sync"
87-
reset_connection = "reset_connection"
88-
connection_updater = "connection_updater"
89-
replicate = "replicate"
61+
class JobType(BaseModel):
62+
__root__: Literal[
63+
"get_spec",
64+
"check_connection",
65+
"discover_schema",
66+
"sync",
67+
"reset_connection",
68+
"connection_updater",
69+
"replicate",
70+
] = Field(
71+
...,
72+
description="enum that describes the different types of jobs that the platform runs.",
73+
title="JobType",
74+
)
9075

9176

9277
class JobTypeResourceLimit(BaseModel):
@@ -137,25 +122,28 @@ class Config:
137122

138123
class Data(BaseModel):
139124
name: str
125+
icon: Optional[str] = None
140126
definitionId: UUID
141-
connectorType: ConnectorType
127+
connectorType: Literal["destination", "source"]
142128
dockerRepository: str
143129
dockerImageTag: str
144130
supportsDbt: Optional[bool] = None
145131
supportsNormalization: Optional[bool] = None
146132
license: str
147133
supportUrl: AnyUrl
148134
githubIssueLabel: str
149-
connectorSubtype: ConnectorSubtype
150-
releaseStage: ReleaseStage
135+
connectorSubtype: Literal[
136+
"api", "database", "file", "custom", "message_queue", "unknown"
137+
]
138+
releaseStage: Literal["alpha", "beta", "generally_available", "source"]
151139
registries: Optional[Registry] = None
152140
allowedHosts: Optional[AllowedHosts] = None
153141
normalizationConfig: Optional[NormalizationDestinationDefinitionConfig] = None
154142
suggestedStreams: Optional[SuggestedStreams] = None
155143
resourceRequirements: Optional[ActorDefinitionResourceRequirements] = None
156144

157145

158-
class ConnectorMetadataDefinitionV1(BaseModel):
146+
class ConnectorMetadataDefinitionV0(BaseModel):
159147
class Config:
160148
extra = Extra.forbid
161149

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
# generated by datamodel-codegen:
2+
# filename: ConnectorRegistryDestinationDefinition.yaml
3+
4+
from __future__ import annotations
5+
6+
from datetime import date
7+
from typing import Any, Dict, List, Optional
8+
from uuid import UUID
9+
10+
from pydantic import BaseModel, Extra, Field
11+
from typing_extensions import Literal
12+
13+
14+
class ReleaseStage(BaseModel):
15+
__root__: Literal["alpha", "beta", "generally_available", "custom"] = Field(
16+
...,
17+
description="enum that describes a connector's release stage",
18+
title="ReleaseStage",
19+
)
20+
21+
22+
class ResourceRequirements(BaseModel):
23+
class Config:
24+
extra = Extra.forbid
25+
26+
cpu_request: Optional[str] = None
27+
cpu_limit: Optional[str] = None
28+
memory_request: Optional[str] = None
29+
memory_limit: Optional[str] = None
30+
31+
32+
class JobType(BaseModel):
33+
__root__: Literal[
34+
"get_spec",
35+
"check_connection",
36+
"discover_schema",
37+
"sync",
38+
"reset_connection",
39+
"connection_updater",
40+
"replicate",
41+
] = Field(
42+
...,
43+
description="enum that describes the different types of jobs that the platform runs.",
44+
title="JobType",
45+
)
46+
47+
48+
class NormalizationDestinationDefinitionConfig(BaseModel):
49+
class Config:
50+
extra = Extra.allow
51+
52+
normalizationRepository: str = Field(
53+
...,
54+
description="a field indicating the name of the repository to be used for normalization. If the value of the flag is NULL - normalization is not used.",
55+
)
56+
normalizationTag: str = Field(
57+
...,
58+
description="a field indicating the tag of the docker repository to be used for normalization.",
59+
)
60+
normalizationIntegrationType: str = Field(
61+
...,
62+
description="a field indicating the type of integration dialect to use for normalization.",
63+
)
64+
65+
66+
class AllowedHosts(BaseModel):
67+
class Config:
68+
extra = Extra.allow
69+
70+
hosts: Optional[List[str]] = Field(
71+
None,
72+
description="An array of hosts that this connector can connect to. AllowedHosts not being present for the source or destination means that access to all hosts is allowed. An empty list here means that no network access is granted.",
73+
)
74+
75+
76+
class JobTypeResourceLimit(BaseModel):
77+
class Config:
78+
extra = Extra.forbid
79+
80+
jobType: JobType
81+
resourceRequirements: ResourceRequirements
82+
83+
84+
class ActorDefinitionResourceRequirements(BaseModel):
85+
class Config:
86+
extra = Extra.forbid
87+
88+
default: Optional[ResourceRequirements] = Field(
89+
None,
90+
description="if set, these are the requirements that should be set for ALL jobs run for this actor definition.",
91+
)
92+
jobSpecific: Optional[List[JobTypeResourceLimit]] = None
93+
94+
95+
class ConnectorRegistryDestinationDefinition(BaseModel):
96+
class Config:
97+
extra = Extra.allow
98+
99+
destinationDefinitionId: UUID
100+
name: str
101+
dockerRepository: str
102+
dockerImageTag: str
103+
documentationUrl: str
104+
icon: Optional[str] = None
105+
spec: Dict[str, Any]
106+
tombstone: Optional[bool] = Field(
107+
False,
108+
description="if false, the configuration is active. if true, then this configuration is permanently off.",
109+
)
110+
public: Optional[bool] = Field(
111+
False,
112+
description="true if this connector definition is available to all workspaces",
113+
)
114+
custom: Optional[bool] = Field(
115+
False, description="whether this is a custom connector definition"
116+
)
117+
releaseStage: Optional[ReleaseStage] = None
118+
releaseDate: Optional[date] = Field(
119+
None,
120+
description="The date when this connector was first released, in yyyy-mm-dd format.",
121+
)
122+
resourceRequirements: Optional[ActorDefinitionResourceRequirements] = None
123+
protocolVersion: Optional[str] = Field(
124+
None, description="the Airbyte Protocol version supported by the connector"
125+
)
126+
normalizationConfig: Optional[NormalizationDestinationDefinitionConfig] = None
127+
supportsDbt: Optional[bool] = Field(
128+
None,
129+
description="an optional flag indicating whether DBT is used in the normalization. If the flag value is NULL - DBT is not used.",
130+
)
131+
allowedHosts: Optional[AllowedHosts] = None

0 commit comments

Comments
 (0)