Skip to content

Commit a7e578e

Browse files
Update openapi spec and written form to comply with latest delta-sharing protocol (#203)
1 parent 51ef061 commit a7e578e

36 files changed

+1803
-297
lines changed

docsite/docs/protocols/delta-sharing-protocol.md

Lines changed: 263 additions & 15 deletions
Large diffs are not rendered by default.

protocol/delta-sharing-protocol-api.yml

Lines changed: 14 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,12 @@ paths:
521521
description: 'If set to true, return the historical metadata if seen in the delta log. This is for the streaming client to check if the table schema is still read compatible.'
522522
schema:
523523
type: boolean
524+
- in: header
525+
name: delta-sharing-capabilities
526+
required: false
527+
description: 'Delta Sharing Capabilities'
528+
schema:
529+
type: string
524530
responses:
525531
'400':
526532
$ref: "#/components/responses/400"
@@ -575,11 +581,14 @@ components:
575581
not Unary Represents a logical not check. This op should have once child.
576582
The supported value types:
577583
ValueType Description
578-
"bool" Represents an Boolean type.
579-
"int" Represents an Integer type.
580-
"long" Represents a Long type.
581-
"string" Represents a String type.
582-
"date" Represents a Date type in "yyyy-mm-dd" format.
584+
"bool" Represents an Boolean type.
585+
"int" Represents an Integer type.
586+
"long" Represents a Long type.
587+
"string" Represents a String type.
588+
"date" Represents a Date type in "yyyy-mm-dd" format.
589+
"float" Represents a Float type.
590+
"double" Represents a Double type.
591+
"timestamp" Represents a timestamp in ISO8601 format, in the UTC timezone.
583592
584593
ListShareResponse:
585594
type: object
@@ -758,124 +767,6 @@ components:
758767
type: string
759768
message:
760769
type: string
761-
762-
# This is not used for the spec but comes handy for autogeneration
763-
TableMetadataResponseObject:
764-
type: object
765-
properties:
766-
protocol:
767-
# it refers to ./delta-sharing-protocol.md#protocol
768-
$ref: '#/components/schemas/ProtocolObject'
769-
metadata:
770-
# it refers to ./delta-sharing-protocol.md#metadata
771-
$ref: '#/components/schemas/MetadataObject'
772-
773-
# This is not used for the spec but comes handy for autogeneration
774-
TableQueryResponseObject:
775-
type: object
776-
properties:
777-
protocol:
778-
# it refers to ./delta-sharing-protocol.md#protocol
779-
$ref: '#/components/schemas/ProtocolObject'
780-
metadata:
781-
# it refers to ./delta-sharing-protocol.md#metadata
782-
$ref: '#/components/schemas/MetadataObject'
783-
files:
784-
type: array
785-
items:
786-
# it refers to ./delta-sharing-protocol.md#file
787-
$ref: '#/components/schemas/FileObject'
788-
FileObject:
789-
type: object
790-
properties:
791-
file:
792-
type: object
793-
properties:
794-
url:
795-
type: string
796-
id:
797-
type: string
798-
partitionValues:
799-
type: object
800-
additionalProperties:
801-
type:
802-
string
803-
size:
804-
type: integer
805-
format: int64
806-
stats:
807-
type: string
808-
version:
809-
type: integer
810-
format: int64
811-
timestamp:
812-
type: integer
813-
format: int64
814-
expirationTimestamp:
815-
type: integer
816-
format: int64
817-
required:
818-
- url
819-
- id
820-
- partitionValues
821-
- size
822-
ProtocolObject:
823-
type: object
824-
properties:
825-
protocol:
826-
type: object
827-
properties:
828-
minReaderVersion:
829-
type: integer
830-
format: int32
831-
FormatObject:
832-
type: object
833-
properties:
834-
provider:
835-
type: string
836-
required:
837-
- provider
838-
839-
MetadataObject:
840-
type: object
841-
properties:
842-
metaData:
843-
type: object
844-
properties:
845-
id:
846-
type: string
847-
name:
848-
type: string
849-
description:
850-
type: string
851-
format:
852-
$ref: '#/components/schemas/FormatObject'
853-
schemaString:
854-
type: string
855-
partitionColumns:
856-
type: array
857-
items:
858-
type: string
859-
configuration:
860-
type: object
861-
additionalProperties:
862-
type:
863-
string
864-
version:
865-
type: integer
866-
format: int64
867-
size:
868-
type: integer
869-
format: int64
870-
numFiles:
871-
type: integer
872-
format: int64
873-
required:
874-
- id
875-
- format
876-
- schemaString
877-
- partitionColumns
878-
879770
responses:
880771
"400":
881772
description: The request is malformed

server/app/src/main/java/io/whitefox/api/deltasharing/DeltaMappers.java

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
package io.whitefox.api.deltasharing;
22

3+
import io.whitefox.api.deltasharing.model.v1.TableMetadataResponse;
4+
import io.whitefox.api.deltasharing.model.v1.TableQueryResponse;
35
import io.whitefox.api.deltasharing.model.v1.generated.*;
6+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetFile;
7+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetMetadata;
8+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetProtocol;
49
import io.whitefox.api.server.CommonMappers;
10+
import io.whitefox.api.server.WhitefoxMappers;
511
import io.whitefox.core.*;
612
import io.whitefox.core.Schema;
713
import io.whitefox.core.Share;
@@ -53,46 +59,46 @@ public static ReadTableRequest api2ReadTableRequest(QueryRequest request) {
5359
}
5460
}
5561

56-
public static TableQueryResponseObject readTableResult2api(ReadTableResult readTableResult) {
57-
return new TableQueryResponseObject()
58-
.metadata(metadata2Api(readTableResult.metadata()))
59-
.protocol(protocol2Api(readTableResult.protocol()))
60-
.files(readTableResult.files().stream()
61-
.map(DeltaMappers::file2Api)
62-
.collect(Collectors.toList()));
62+
public static TableQueryResponse readTableResult2api(ReadTableResult readTableResult) {
63+
return new TableQueryResponse(
64+
protocol2Api(readTableResult.protocol()),
65+
metadata2Api(readTableResult.metadata()),
66+
readTableResult.files().stream().map(DeltaMappers::file2Api).collect(Collectors.toList()));
6367
}
6468

65-
private static MetadataObject metadata2Api(Metadata metadata) {
66-
return new MetadataObject()
67-
.metaData(new MetadataObjectMetaData()
69+
private static ParquetMetadata metadata2Api(Metadata metadata) {
70+
return ParquetMetadata.builder()
71+
.metadata(ParquetMetadata.Metadata.builder()
6872
.id(metadata.id())
69-
.name(metadata.name().orElse(null))
70-
.description(metadata.description().orElse(null))
71-
.format(new FormatObject().provider(metadata.format().provider()))
73+
.name(metadata.name())
74+
.description(metadata.description())
75+
.format(WhitefoxMappers.format2api(metadata.format()))
7276
.schemaString(metadata.tableSchema().structType().toJson())
7377
.partitionColumns(metadata.partitionColumns())
74-
._configuration(metadata.configuration())
75-
.version(metadata.version().orElse(null))
76-
.numFiles(metadata.numFiles().orElse(null)));
78+
.configuration(Optional.of(metadata.configuration()))
79+
.version(metadata.version())
80+
.numFiles(metadata.numFiles())
81+
.build())
82+
.build();
7783
}
7884

79-
private static ProtocolObject protocol2Api(Protocol protocol) {
80-
return new ProtocolObject()
81-
.protocol(new ProtocolObjectProtocol()
82-
.minReaderVersion(protocol.minReaderVersion().orElse(1)));
85+
private static ParquetProtocol protocol2Api(Protocol protocol) {
86+
return ParquetProtocol.ofMinReaderVersion(protocol.minReaderVersion().orElse(1));
8387
}
8488

85-
private static FileObject file2Api(TableFile f) {
86-
return new FileObject()
87-
._file(new FileObjectFile()
89+
private static ParquetFile file2Api(TableFile f) {
90+
return ParquetFile.builder()
91+
.file(ParquetFile.File.builder()
8892
.id(f.id())
8993
.url(f.url())
9094
.partitionValues(f.partitionValues())
9195
.size(f.size())
92-
.stats(f.stats().orElse(null))
93-
.version(f.version().orElse(null))
94-
.timestamp(f.timestamp().orElse(null))
95-
.expirationTimestamp(f.expirationTimestamp()));
96+
.stats(f.stats())
97+
.version(f.version())
98+
.timestamp(f.timestamp())
99+
.expirationTimestamp(Optional.of(f.expirationTimestamp()))
100+
.build())
101+
.build();
96102
}
97103

98104
public static TableReferenceAndReadRequest api2TableReferenceAndReadRequest(
@@ -127,9 +133,9 @@ public static Map<String, String> toHeaderCapabilitiesMap(String headerCapabilit
127133
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
128134
}
129135

130-
public static TableMetadataResponseObject toTableResponseMetadata(Metadata m) {
131-
return new TableMetadataResponseObject()
132-
.protocol(new ProtocolObject().protocol(new ProtocolObjectProtocol().minReaderVersion(1)))
133-
.metadata(metadata2Api(m));
136+
public static TableMetadataResponse toTableResponseMetadata(Metadata m) {
137+
return new TableMetadataResponse(
138+
ParquetProtocol.ofMinReaderVersion(1), // smell
139+
metadata2Api(m));
134140
}
135141
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package io.whitefox.api.deltasharing.model.v1;
2+
3+
import com.fasterxml.jackson.annotation.JsonCreator;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import lombok.EqualsAndHashCode;
6+
7+
@EqualsAndHashCode
8+
public class Format {
9+
private static final String PARQUET = "parquet";
10+
11+
@JsonProperty
12+
public String provider() {
13+
return PARQUET;
14+
}
15+
16+
public Format() {
17+
this(PARQUET);
18+
}
19+
20+
@JsonCreator
21+
private Format(@JsonProperty("provider") String provider) {
22+
if (!"parquet".equalsIgnoreCase(provider)) {
23+
throw new IllegalArgumentException("Provider must be " + PARQUET);
24+
}
25+
}
26+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
package io.whitefox.api.deltasharing.model.v1;
2+
3+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetMetadata;
4+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetProtocol;
5+
import lombok.NonNull;
6+
import lombok.Value;
7+
8+
@Value
9+
public class TableMetadataResponse {
10+
@NonNull ParquetProtocol protocol;
11+
12+
@NonNull ParquetMetadata metadata;
13+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
package io.whitefox.api.deltasharing.model.v1;
2+
3+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetFile;
4+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetMetadata;
5+
import io.whitefox.api.deltasharing.model.v1.parquet.ParquetProtocol;
6+
import java.util.List;
7+
import lombok.NonNull;
8+
import lombok.Value;
9+
10+
@Value
11+
public class TableQueryResponse {
12+
@NonNull ParquetProtocol protocol;
13+
14+
@NonNull ParquetMetadata metadata;
15+
16+
@NonNull List<ParquetFile> files;
17+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package io.whitefox.api.deltasharing.model.v1.delta;
2+
3+
import com.fasterxml.jackson.annotation.JsonInclude;
4+
import com.fasterxml.jackson.annotation.JsonProperty;
5+
import com.fasterxml.jackson.databind.JsonNode;
6+
import java.util.Optional;
7+
import lombok.Builder;
8+
import lombok.NonNull;
9+
import lombok.Value;
10+
import lombok.experimental.SuperBuilder;
11+
import lombok.extern.jackson.Jacksonized;
12+
13+
@Value
14+
@SuperBuilder
15+
@Jacksonized
16+
public class DeltaFile {
17+
18+
@JsonProperty
19+
@NonNull File file;
20+
21+
@Value
22+
@SuperBuilder
23+
@Jacksonized
24+
@JsonInclude(JsonInclude.Include.NON_ABSENT)
25+
public static class File {
26+
27+
/**
28+
* A unique string for the file in a table.
29+
* The same file is guaranteed to have the same id across multiple requests.
30+
* A client may cache the file content and use this id as a key to decide whether to use the cached file content.
31+
*/
32+
@JsonProperty
33+
@NonNull String id;
34+
35+
/**
36+
* A unique string for the deletion vector file in a table.
37+
* The same deletion vector file is guaranteed to have the same id across multiple requests.
38+
* A client may cache the file content and use this id as a key to decide whether to use the cached file content.
39+
*/
40+
@JsonProperty
41+
@Builder.Default
42+
Optional<String> deletionVectorFileId = Optional.empty();
43+
44+
/**
45+
* The table version of the file, returned when querying a table data with a version or timestamp parameter.
46+
*/
47+
@JsonProperty
48+
@Builder.Default
49+
Optional<Long> version = Optional.empty();
50+
51+
/**
52+
* The unix timestamp corresponding to the table version of the file, in milliseconds,
53+
* returned when querying a table data with a version or timestamp parameter.
54+
*/
55+
@JsonProperty
56+
@Builder.Default
57+
Optional<Long> timestamp = Optional.empty();
58+
59+
/**
60+
* The unix timestamp corresponding to the expiration of the url, in milliseconds,
61+
* returned when the server supports the feature.
62+
*/
63+
@JsonProperty
64+
@Builder.Default
65+
Optional<Long> expirationTimestamp = Optional.empty();
66+
67+
/**
68+
* Need to be parsed by a delta library as a delta single action, the path field is replaced by pr-signed url.
69+
*/
70+
@JsonProperty
71+
@NonNull JsonNode deltaSingleAction;
72+
}
73+
}

0 commit comments

Comments
 (0)