googleapis
diff --git a/‎tests/unit/aiplatform/test_language_models.py‎
Lines changed: 1 addition & 201 deletions b/‎tests/unit/aiplatform/test_language_models.py‎
Lines changed: 1 addition & 201 deletions
@@ -101,18 +101,7 @@
  "prediction_schema_uri": "gs://google-cloud-aiplatform/schema/predict/prediction/text_generation_1.0.0.yaml",
  },
 }
-_TEXT_GECKO_PUBLISHER_MODEL_DICT = {
- "name": "publishers/google/models/textembedding-gecko",
- "version_id": "003",
- "open_source_category": "PROPRIETARY",
- "launch_stage": gca_publisher_model.PublisherModel.LaunchStage.GA,
- "publisher_model_template": "projects/{user-project}/locations/{location}/publishers/google/models/textembedding-gecko@003",
- "predict_schemata": {
- "instance_schema_uri": "gs://google-cloud-aiplatform/schema/predict/instance/text_embedding_1.0.0.yaml",
- "parameters_schema_uri": "gs://google-cloud-aiplatfrom/schema/predict/params/text_embedding_1.0.0.yaml",
- "prediction_schema_uri": "gs://google-cloud-aiplatform/schema/predict/prediction/text_embedding_1.0.0.yaml",
- },
-}
+
 _CHAT_BISON_PUBLISHER_MODEL_DICT = {
  "name": "publishers/google/models/chat-bison",
  "version_id": "001",
@@ -539,105 +528,6 @@ def reverse_string_2(s):""",
  },
 )
 
-_EMBEDING_MODEL_TUNING_PIPELINE_SPEC = {
- "components": {},
- "deploymentSpec": {},
- "pipelineInfo": {
- "description": "Pipeline definition for v1.1.x embedding tuning pipelines.",
- "name": "tune-text-embedding-model",
- },
- "root": {
- "dag": {"tasks": {}},
- "inputDefinitions": {
- "parameters": {
- "accelerator_count": {
- "defaultValue": 4,
- "description": "how many accelerators to use when running the\ncontainer.",
- "isOptional": True,
- "parameterType": "NUMBER_INTEGER",
- },
- "accelerator_type": {
- "defaultValue": "NVIDIA_TESLA_V100",
- "description": "the accelerator type for running the trainer component.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "base_model_version_id": {
- "defaultValue": "textembedding-gecko@001",
- "description": "which base model to tune. This may be any stable\nnumbered version, for example `textembedding-gecko@001`.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "batch_size": {
- "defaultValue": 128,
- "description": "training batch size.",
- "isOptional": True,
- "parameterType": "NUMBER_INTEGER",
- },
- "corpus_path": {
- "description": "the GCS path to the corpus data location.",
- "parameterType": "STRING",
- },
- "iterations": {
- "defaultValue": 1000,
- "description": "the number of steps to perform fine-tuning.",
- "isOptional": True,
- "parameterType": "NUMBER_INTEGER",
- },
- "location": {
- "defaultValue": "us-central1",
- "description": "GCP region to run the pipeline.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "machine_type": {
- "defaultValue": "n1-standard-16",
- "description": "the type of the machine to run the trainer component. For\nmore details about this input config, see:\nhttps://cloud.google.com/vertex-ai/docs/training/configure-compute.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "model_display_name": {
- "defaultValue": "tuned-text-embedding-model",
- "description": "output model display name.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "project": {
- "description": "user's project id.",
- "parameterType": "STRING",
- },
- "queries_path": {
- "description": "the GCS path to the queries location.",
- "parameterType": "STRING",
- },
- "task_type": {
- "defaultValue": "DEFAULT",
- "description": "the task type expected to be used during inference. Valid\nvalues are `DEFAULT`, `RETRIEVAL_QUERY`, `RETRIEVAL_DOCUMENT`,\n`SEMANTIC_SIMILARITY`, `CLASSIFICATION`, and `CLUSTERING`.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "test_label_path": {
- "defaultValue": "",
- "description": "the GCS path to the test label data location.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- "train_label_path": {
- "description": "the GCS path to the train label data location.",
- "parameterType": "STRING",
- },
- "validation_label_path": {
- "defaultValue": "",
- "description": "The GCS path to the validation label data location.",
- "isOptional": True,
- "parameterType": "STRING",
- },
- }
- },
- },
- "schemaVersion": "2.1.0",
- "sdkVersion": "kfp-2.6.0",
-}
 _TEST_PIPELINE_SPEC = {
  "components": {},
  "pipelineInfo": {"name": "evaluation-llm-text-generation-pipeline"},
@@ -751,9 +641,6 @@ def reverse_string_2(s):""",
 }
 
 
-_EMBEDING_MODEL_TUNING_PIPELINE_SPEC_JSON = json.dumps(
- _EMBEDING_MODEL_TUNING_PIPELINE_SPEC,
-)
 _TEST_PIPELINE_SPEC_JSON = json.dumps(
  _TEST_PIPELINE_SPEC,
 )
@@ -1573,18 +1460,6 @@ def mock_request_urlopen(request: str) -> Tuple[str, mock.MagicMock]:
  yield request.param, mock_urlopen
 
 
-@pytest.fixture
-def mock_request_urlopen_gecko(request: str) -> Tuple[str, mock.MagicMock]:
- data = _EMBEDING_MODEL_TUNING_PIPELINE_SPEC
- with mock.patch.object(urllib_request, "urlopen") as mock_urlopen:
- mock_read_response = mock.MagicMock()
- mock_decode_response = mock.MagicMock()
- mock_decode_response.return_value = json.dumps(data)
- mock_read_response.return_value.decode = mock_decode_response
- mock_urlopen.return_value.read = mock_read_response
- yield request.param, mock_urlopen
-
-
 @pytest.fixture
 def mock_request_urlopen_rlhf(request: str) -> Tuple[str, mock.MagicMock]:
  data = _TEST_RLHF_PIPELINE_SPEC
@@ -1653,21 +1528,6 @@ def get_endpoint_mock():
  yield get_endpoint_mock
 
 
-@pytest.fixture
-def mock_get_tuned_embedding_model(get_endpoint_mock):
- with mock.patch.object(
- _language_models._TunableEmbeddingModelMixin, "get_tuned_model"
- ) as mock_text_generation_model:
- mock_text_generation_model.return_value._model_id = (
- test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
- )
- mock_text_generation_model.return_value._endpoint_name = (
- test_constants.EndpointConstants._TEST_ENDPOINT_NAME
- )
- mock_text_generation_model.return_value._endpoint = get_endpoint_mock
- yield mock_text_generation_model
-
-
 @pytest.fixture
 def mock_get_tuned_model(get_endpoint_mock):
  with mock.patch.object(
@@ -2274,66 +2134,6 @@ def test_text_generation_response_repr(self):
  assert "blocked" in response_repr
  assert "Violent" in response_repr
 
- @pytest.mark.parametrize(
- "job_spec",
- [_EMBEDING_MODEL_TUNING_PIPELINE_SPEC_JSON],
- )
- @pytest.mark.parametrize(
- "mock_request_urlopen_gecko",
- ["https://us-central1-kfp.pkg.dev/proj/repo/pack/latest"],
- indirect=True,
- )
- def test_tune_text_embedding_model(
- self,
- mock_pipeline_service_create,
- mock_pipeline_job_get,
- mock_pipeline_bucket_exists,
- job_spec,
- mock_load_yaml_and_json,
- mock_gcs_from_string,
- mock_gcs_upload,
- mock_request_urlopen_gecko,
- mock_get_tuned_embedding_model,
- ):
- """Tests tuning the text embedding model."""
- aiplatform.init(
- project=_TEST_PROJECT,
- location=_TEST_LOCATION,
- encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME,
- )
- with mock.patch.object(
- target=model_garden_service_client.ModelGardenServiceClient,
- attribute="get_publisher_model",
- return_value=gca_publisher_model.PublisherModel(
- _TEXT_GECKO_PUBLISHER_MODEL_DICT
- ),
- ):
- model = language_models.TextEmbeddingModel.from_pretrained(
- "textembedding-gecko@003"
- )
- tuning_job = model.tune_model(
- training_data="gs://bucket/training.tsv",
- corpus_data="gs://bucket/corpus.jsonl",
- queries_data="gs://bucket/queries.jsonl",
- test_data="gs://bucket/test.tsv",
- tuned_model_location="us-central1",
- train_steps=10,
- accelerator="NVIDIA_TESLA_A100",
- )
- call_kwargs = mock_pipeline_service_create.call_args[1]
- pipeline_arguments = call_kwargs[
- "pipeline_job"
- ].runtime_config.parameter_values
- assert pipeline_arguments["iterations"] == 10
- assert pipeline_arguments["accelerator_type"] == "NVIDIA_TESLA_A100"
-
- # Testing the tuned model
- tuned_model = tuning_job.get_tuned_model()
- assert (
- tuned_model._endpoint_name
- == test_constants.EndpointConstants._TEST_ENDPOINT_NAME
- )
-
  @pytest.mark.parametrize(
  "job_spec",
  [_TEST_PIPELINE_SPEC_JSON, _TEST_PIPELINE_JOB],