Skip to content

Commit 9763247

Browse files
committed
Write code to create datacontract CLI schema
1 parent 805ab68 commit 9763247

File tree

4 files changed

+52
-3
lines changed

4 files changed

+52
-3
lines changed

customers.contract.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,25 @@
11
dataset: customers
22
owner: product-team@data-contracts.com
33
description: All active customers of our product.
4+
version: 1
45

56
columns:
67
- name: id
7-
description: Unique ID in ...
8+
description: Unique ID for each customer
89
data_type: VARCHAR
910
checks:
1011
- type: no_missing_values
1112
- type: no_duplicate_values
1213
- name: size
14+
description: The customers t-shirt size
1315
data_type: VARCHAR
1416
checks:
1517
- type: invalid_count
1618
valid_values: ['S', 'M', 'L']
1719
must_be_less_than: 1
1820
- name: created
21+
description: The timestamp at which the customer object was created
1922
data_type: TIMESTAMP
2023
- name: distance
24+
description: The distance the customer is from our shop
2125
data_type: INTEGER

datacontractcli/.keep

Whitespace-only changes.

lib/data_contract.py

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import yaml
2+
13
def to_atlas_schema(contract):
24
hcl = f"""schema "public" {{}}
35
@@ -16,7 +18,7 @@ def to_atlas_schema(contract):
1618
protobuf_type_map = {
1719
"varchar": "string",
1820
"integer": "int64",
19-
# proto doesn't have a timestamp field, so we'll just
21+
# protobuf doesn't have a timestamp field, so we'll just
2022
# use string for this example
2123
"timestamp": "string",
2224
}
@@ -28,4 +30,41 @@ def to_protobuf(contract):
2830
proto += f"\toptional {type} {column['name']} = {i};\n"
2931

3032
proto += "}"
31-
return proto
33+
return proto
34+
35+
def to_datacontract_cli(contract):
36+
data = dict(
37+
dataContractSpecification = '0.9.3',
38+
id = contract["dataset"],
39+
info = dict (
40+
title = contract["dataset"],
41+
version = str(contract["version"]),
42+
description = contract["description"],
43+
owner = contract["owner"],
44+
),
45+
servers = dict(
46+
workshop = dict(
47+
type = 'postgres',
48+
host = 'localhost',
49+
port = 5432,
50+
database = 'postgres',
51+
schema = 'public',
52+
),
53+
),
54+
models = dict()
55+
)
56+
57+
fields = dict()
58+
for column in contract["columns"]:
59+
fields[column["name"]] = dict(
60+
description = column["description"],
61+
type = column["data_type"].lower()
62+
)
63+
64+
data["models"][contract["dataset"]] = {
65+
'fields': fields,
66+
'description': contract["description"],
67+
}
68+
69+
70+
return data

main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,9 @@
7575
writer.write(protobuf_schema)
7676
print(f'Written Protobuf schema of `{contract["dataset"]}` data contract to `{proto_file}`')
7777

78+
datacontract_cli_schema = to_datacontract_cli(contract)
79+
datacontract_cli_file = 'datacontractcli/customers.yaml'
80+
with open(datacontract_cli_file, 'w') as writer:
81+
yaml.dump(datacontract_cli_schema, writer)
82+
print(f'Written datacontract CLI schema of `{contract["dataset"]}` data contract to `{datacontract_cli_file}`')
83+

0 commit comments

Comments
 (0)