Skip to content

Commit 9b48d24

Browse files
gcf-owl-bot[bot]copybara-github
authored andcommitted
Copybara import of the project:
-- 73bda4c by Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>: feat: Introduce RagFileMetadataConfig for importing metadata to Rag PiperOrigin-RevId: 770274285 Source-Link: googleapis/googleapis@4cdc2aa Source-Link: googleapis/googleapis-gen@9dbe3e0 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiOWRiZTNlMGYyZGM5NTljYzI2YzU5NGM3NGE5YTkzZTQwOGMxNThiNiJ9 -- 08e37c3 by Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>: feat: add EncryptionSpec field for RagCorpus CMEK feature to v1 PiperOrigin-RevId: 770837205 Source-Link: googleapis/googleapis@3a45aa3 Source-Link: googleapis/googleapis-gen@6d6b54f Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNmQ2YjU0ZmMzZTExYmQ3OWM1MjBhNGRmOGNiMTU2MWEyYzZhYTE0OSJ9 -- 2e54a68 by Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>: 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md COPYBARA_INTEGRATE_REVIEW=#5422 from googleapis:owl-bot-copy 8660dc9 PiperOrigin-RevId: 771240575
1 parent d69ef6b commit 9b48d24

File tree

14 files changed

+202
-28
lines changed

14 files changed

+202
-28
lines changed

google/cloud/aiplatform_v1/services/migration_service/client.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -265,40 +265,40 @@ def parse_dataset_path(path: str) -> Dict[str, str]:
265265
@staticmethod
266266
def dataset_path(
267267
project: str,
268-
location: str,
269268
dataset: str,
270269
) -> str:
271270
"""Returns a fully-qualified dataset string."""
272-
return "projects/{project}/locations/{location}/datasets/{dataset}".format(
271+
return "projects/{project}/datasets/{dataset}".format(
273272
project=project,
274-
location=location,
275273
dataset=dataset,
276274
)
277275

278276
@staticmethod
279277
def parse_dataset_path(path: str) -> Dict[str, str]:
280278
"""Parses a dataset path into its component segments."""
281-
m = re.match(
282-
r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/datasets/(?P<dataset>.+?)$",
283-
path,
284-
)
279+
m = re.match(r"^projects/(?P<project>.+?)/datasets/(?P<dataset>.+?)$", path)
285280
return m.groupdict() if m else {}
286281

287282
@staticmethod
288283
def dataset_path(
289284
project: str,
285+
location: str,
290286
dataset: str,
291287
) -> str:
292288
"""Returns a fully-qualified dataset string."""
293-
return "projects/{project}/datasets/{dataset}".format(
289+
return "projects/{project}/locations/{location}/datasets/{dataset}".format(
294290
project=project,
291+
location=location,
295292
dataset=dataset,
296293
)
297294

298295
@staticmethod
299296
def parse_dataset_path(path: str) -> Dict[str, str]:
300297
"""Parses a dataset path into its component segments."""
301-
m = re.match(r"^projects/(?P<project>.+?)/datasets/(?P<dataset>.+?)$", path)
298+
m = re.match(
299+
r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/datasets/(?P<dataset>.+?)$",
300+
path,
301+
)
302302
return m.groupdict() if m else {}
303303

304304
@staticmethod

google/cloud/aiplatform_v1/services/vertex_rag_data_service/async_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from google.api_core import operation as gac_operation # type: ignore
4949
from google.api_core import operation_async # type: ignore
5050
from google.cloud.aiplatform_v1.services.vertex_rag_data_service import pagers
51+
from google.cloud.aiplatform_v1.types import encryption_spec
5152
from google.cloud.aiplatform_v1.types import io
5253
from google.cloud.aiplatform_v1.types import operation as gca_operation
5354
from google.cloud.aiplatform_v1.types import vertex_rag_data

google/cloud/aiplatform_v1/services/vertex_rag_data_service/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from google.api_core import operation as gac_operation # type: ignore
6565
from google.api_core import operation_async # type: ignore
6666
from google.cloud.aiplatform_v1.services.vertex_rag_data_service import pagers
67+
from google.cloud.aiplatform_v1.types import encryption_spec
6768
from google.cloud.aiplatform_v1.types import io
6869
from google.cloud.aiplatform_v1.types import operation as gca_operation
6970
from google.cloud.aiplatform_v1.types import vertex_rag_data

google/cloud/aiplatform_v1/types/tuning_job.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -529,9 +529,13 @@ class SupervisedTuningSpec(proto.Message):
529529
530530
Attributes:
531531
training_dataset_uri (str):
532-
Required. Training dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
532+
Required. Cloud Storage path to file
533+
containing training dataset for tuning. The
534+
dataset must be formatted as a JSONL file.
533535
validation_dataset_uri (str):
534-
Optional. Validation dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
536+
Optional. Cloud Storage path to file
537+
containing validation dataset for tuning. The
538+
dataset must be formatted as a JSONL file.
535539
hyper_parameters (google.cloud.aiplatform_v1.types.SupervisedHyperParameters):
536540
Optional. Hyperparameters for SFT.
537541
export_last_checkpoint_only (bool):

google/cloud/aiplatform_v1/types/vertex_rag_data.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import proto # type: ignore
2121

2222
from google.cloud.aiplatform_v1.types import api_auth as gca_api_auth
23+
from google.cloud.aiplatform_v1.types import encryption_spec as gca_encryption_spec
2324
from google.cloud.aiplatform_v1.types import io
2425
from google.protobuf import timestamp_pb2 # type: ignore
2526

@@ -414,6 +415,12 @@ class RagCorpus(proto.Message):
414415
was last updated.
415416
corpus_status (google.cloud.aiplatform_v1.types.CorpusStatus):
416417
Output only. RagCorpus state.
418+
encryption_spec (google.cloud.aiplatform_v1.types.EncryptionSpec):
419+
Optional. Immutable. The CMEK key name used
420+
to encrypt at-rest data related to this Corpus.
421+
Only applicable to RagManagedDb option for
422+
Vector DB. This field can only be set at corpus
423+
creation time, and cannot be updated or deleted.
417424
"""
418425

419426
vector_db_config: "RagVectorDbConfig" = proto.Field(
@@ -455,6 +462,11 @@ class RagCorpus(proto.Message):
455462
number=8,
456463
message="CorpusStatus",
457464
)
465+
encryption_spec: gca_encryption_spec.EncryptionSpec = proto.Field(
466+
proto.MESSAGE,
467+
number=12,
468+
message=gca_encryption_spec.EncryptionSpec,
469+
)
458470

459471

460472
class RagFile(proto.Message):

google/cloud/aiplatform_v1beta1/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,7 @@
12021202
from .types.vertex_rag_data import RagEngineConfig
12031203
from .types.vertex_rag_data import RagFile
12041204
from .types.vertex_rag_data import RagFileChunkingConfig
1205+
from .types.vertex_rag_data import RagFileMetadataConfig
12051206
from .types.vertex_rag_data import RagFileParsingConfig
12061207
from .types.vertex_rag_data import RagFileTransformationConfig
12071208
from .types.vertex_rag_data import RagManagedDbConfig
@@ -2128,6 +2129,7 @@
21282129
"RagEngineConfig",
21292130
"RagFile",
21302131
"RagFileChunkingConfig",
2132+
"RagFileMetadataConfig",
21312133
"RagFileParsingConfig",
21322134
"RagFileTransformationConfig",
21332135
"RagManagedDbConfig",

google/cloud/aiplatform_v1beta1/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1355,6 +1355,7 @@
13551355
RagEngineConfig,
13561356
RagFile,
13571357
RagFileChunkingConfig,
1358+
RagFileMetadataConfig,
13581359
RagFileParsingConfig,
13591360
RagFileTransformationConfig,
13601361
RagManagedDbConfig,
@@ -2499,6 +2500,7 @@
24992500
"RagEngineConfig",
25002501
"RagFile",
25012502
"RagFileChunkingConfig",
2503+
"RagFileMetadataConfig",
25022504
"RagFileParsingConfig",
25032505
"RagFileTransformationConfig",
25042506
"RagManagedDbConfig",

google/cloud/aiplatform_v1beta1/types/tuning_job.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -764,9 +764,14 @@ class SupervisedTuningSpec(proto.Message):
764764
765765
Attributes:
766766
training_dataset_uri (str):
767-
Required. Training dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.
767+
Required. Cloud Storage path to file
768+
containing training dataset for tuning. The
769+
dataset must be formatted as a JSONL file.
768770
validation_dataset_uri (str):
769-
Optional. Validation dataset used for tuning. The dataset can be specified as either a Cloud Storage path to a JSONL file or as the resource name of a Vertex Multimodal Dataset.a1.types.SupervisedHyperParameters):
771+
Optional. Cloud Storage path to file
772+
containing validation dataset for tuning. The
773+
dataset must be formatted as a JSONL file.
774+
hyper_parameters (google.cloud.aiplatform_v1beta1.types.SupervisedHyperParameters):
770775
Optional. Hyperparameters for SFT.
771776
export_last_checkpoint_only (bool):
772777
Optional. If set to true, disable

google/cloud/aiplatform_v1beta1/types/vertex_rag_data.py

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"RagFileChunkingConfig",
4040
"RagFileTransformationConfig",
4141
"RagFileParsingConfig",
42+
"RagFileMetadataConfig",
4243
"UploadRagFileConfig",
4344
"ImportRagFilesConfig",
4445
"RagManagedDbConfig",
@@ -776,6 +777,9 @@ class RagFile(proto.Message):
776777
last updated.
777778
file_status (google.cloud.aiplatform_v1beta1.types.FileStatus):
778779
Output only. State of the RagFile.
780+
user_metadata (str):
781+
Output only. The metadata for metadata
782+
search. The contents will be be in JSON format.
779783
"""
780784

781785
class RagFileType(proto.Enum):
@@ -865,6 +869,10 @@ class RagFileType(proto.Enum):
865869
number=13,
866870
message="FileStatus",
867871
)
872+
user_metadata: str = proto.Field(
873+
proto.STRING,
874+
number=15,
875+
)
868876

869877

870878
class RagChunk(proto.Message):
@@ -1136,6 +1144,103 @@ class LlmParser(proto.Message):
11361144
)
11371145

11381146

1147+
class RagFileMetadataConfig(proto.Message):
1148+
r"""Metadata config for RagFile.
1149+
1150+
This message has `oneof`_ fields (mutually exclusive fields).
1151+
For each oneof, at most one member field can be set at the same time.
1152+
Setting any member of the oneof automatically clears all other
1153+
members.
1154+
1155+
.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
1156+
1157+
Attributes:
1158+
gcs_metadata_schema_source (google.cloud.aiplatform_v1beta1.types.GcsSource):
1159+
Google Cloud Storage location. Supports importing individual
1160+
files as well as entire Google Cloud Storage directories.
1161+
Sample formats:
1162+
1163+
- ``gs://bucket_name/my_directory/object_name/metadata_schema.json``
1164+
- ``gs://bucket_name/my_directory`` If providing a
1165+
directory, the metadata schema will be read from the
1166+
files that ends with "metadata_schema.json" in the
1167+
directory.
1168+
1169+
This field is a member of `oneof`_ ``metadata_schema_source``.
1170+
google_drive_metadata_schema_source (google.cloud.aiplatform_v1beta1.types.GoogleDriveSource):
1171+
Google Drive location. Supports importing individual files
1172+
as well as Google Drive folders. If providing a folder, the
1173+
metadata schema will be read from the files that ends with
1174+
"metadata_schema.json" in the directory.
1175+
1176+
This field is a member of `oneof`_ ``metadata_schema_source``.
1177+
inline_metadata_schema_source (str):
1178+
Inline metadata schema source. Must be a JSON
1179+
string.
1180+
1181+
This field is a member of `oneof`_ ``metadata_schema_source``.
1182+
gcs_metadata_source (google.cloud.aiplatform_v1beta1.types.GcsSource):
1183+
Google Cloud Storage location. Supports importing individual
1184+
files as well as entire Google Cloud Storage directories.
1185+
Sample formats:
1186+
1187+
- ``gs://bucket_name/my_directory/object_name/metadata.json``
1188+
- ``gs://bucket_name/my_directory`` If providing a
1189+
directory, the metadata will be read from the files that
1190+
ends with "metadata.json" in the directory.
1191+
1192+
This field is a member of `oneof`_ ``metadata_source``.
1193+
google_drive_metadata_source (google.cloud.aiplatform_v1beta1.types.GoogleDriveSource):
1194+
Google Drive location. Supports importing
1195+
individual files as well as Google Drive
1196+
folders. If providing a directory, the metadata
1197+
will be read from the files that ends with
1198+
"metadata.json" in the directory.
1199+
1200+
This field is a member of `oneof`_ ``metadata_source``.
1201+
inline_metadata_source (str):
1202+
Inline metadata source. Must be a JSON
1203+
string.
1204+
1205+
This field is a member of `oneof`_ ``metadata_source``.
1206+
"""
1207+
1208+
gcs_metadata_schema_source: io.GcsSource = proto.Field(
1209+
proto.MESSAGE,
1210+
number=1,
1211+
oneof="metadata_schema_source",
1212+
message=io.GcsSource,
1213+
)
1214+
google_drive_metadata_schema_source: io.GoogleDriveSource = proto.Field(
1215+
proto.MESSAGE,
1216+
number=2,
1217+
oneof="metadata_schema_source",
1218+
message=io.GoogleDriveSource,
1219+
)
1220+
inline_metadata_schema_source: str = proto.Field(
1221+
proto.STRING,
1222+
number=3,
1223+
oneof="metadata_schema_source",
1224+
)
1225+
gcs_metadata_source: io.GcsSource = proto.Field(
1226+
proto.MESSAGE,
1227+
number=4,
1228+
oneof="metadata_source",
1229+
message=io.GcsSource,
1230+
)
1231+
google_drive_metadata_source: io.GoogleDriveSource = proto.Field(
1232+
proto.MESSAGE,
1233+
number=5,
1234+
oneof="metadata_source",
1235+
message=io.GoogleDriveSource,
1236+
)
1237+
inline_metadata_source: str = proto.Field(
1238+
proto.STRING,
1239+
number=6,
1240+
oneof="metadata_source",
1241+
)
1242+
1243+
11391244
class UploadRagFileConfig(proto.Message):
11401245
r"""Config for uploading RagFile.
11411246
@@ -1146,6 +1251,15 @@ class UploadRagFileConfig(proto.Message):
11461251
rag_file_transformation_config (google.cloud.aiplatform_v1beta1.types.RagFileTransformationConfig):
11471252
Specifies the transformation config for
11481253
RagFiles.
1254+
rag_file_metadata_config (google.cloud.aiplatform_v1beta1.types.RagFileMetadataConfig):
1255+
Specifies the metadata config for RagFiles.
1256+
Including paths for metadata schema and
1257+
metadata. Alteratively, inline metadata schema
1258+
and metadata can be provided.
1259+
rag_file_parsing_config (google.cloud.aiplatform_v1beta1.types.RagFileParsingConfig):
1260+
Optional. Specifies the parsing config for
1261+
RagFiles. RAG will use the default parser if
1262+
this field is not set.
11491263
"""
11501264

11511265
rag_file_chunking_config: "RagFileChunkingConfig" = proto.Field(
@@ -1158,6 +1272,16 @@ class UploadRagFileConfig(proto.Message):
11581272
number=3,
11591273
message="RagFileTransformationConfig",
11601274
)
1275+
rag_file_metadata_config: "RagFileMetadataConfig" = proto.Field(
1276+
proto.MESSAGE,
1277+
number=4,
1278+
message="RagFileMetadataConfig",
1279+
)
1280+
rag_file_parsing_config: "RagFileParsingConfig" = proto.Field(
1281+
proto.MESSAGE,
1282+
number=5,
1283+
message="RagFileParsingConfig",
1284+
)
11611285

11621286

11631287
class ImportRagFilesConfig(proto.Message):
@@ -1241,6 +1365,10 @@ class ImportRagFilesConfig(proto.Message):
12411365
Optional. Specifies the parsing config for
12421366
RagFiles. RAG will use the default parser if
12431367
this field is not set.
1368+
rag_file_metadata_config (google.cloud.aiplatform_v1beta1.types.RagFileMetadataConfig):
1369+
Specifies the metadata config for RagFiles.
1370+
Including paths for metadata schema and
1371+
metadata.
12441372
max_embedding_requests_per_min (int):
12451373
Optional. The max number of queries per
12461374
minute that this job is allowed to make to the
@@ -1338,6 +1466,11 @@ class ImportRagFilesConfig(proto.Message):
13381466
number=8,
13391467
message="RagFileParsingConfig",
13401468
)
1469+
rag_file_metadata_config: "RagFileMetadataConfig" = proto.Field(
1470+
proto.MESSAGE,
1471+
number=17,
1472+
message="RagFileMetadataConfig",
1473+
)
13411474
max_embedding_requests_per_min: int = proto.Field(
13421475
proto.INT32,
13431476
number=5,

samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-aiplatform",
11-
"version": "1.97.0"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

0 commit comments

Comments
 (0)