scaleapi
diff --git a/‎docs/api/llms.md‎
Lines changed: 42 additions & 0 deletions b/‎docs/api/llms.md‎
Lines changed: 42 additions & 0 deletions
diff --git a/‎launch/client.py‎
Lines changed: 1 addition & 1 deletion b/‎launch/client.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions b/‎mkdocs.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tests/test_docs.py‎
Lines changed: 24 additions & 1 deletion b/‎tests/test_docs.py‎
Lines changed: 24 additions & 1 deletion
@@ -0,0 +1,42 @@
+# LLM APIs
+
+We provide some APIs to conveniently create, list and inference with LLMs. Under the hood they are Launch model endpoints.
+
+## Example
+
+```py title="LLM APIs Usage"
+import os
+
+from rich import print
+
+from launch import LaunchClient
+from launch.api_client.model.llm_inference_framework import (
+ LLMInferenceFramework,
+)
+from launch.api_client.model.llm_source import LLMSource
+
+client = LaunchClient(api_key=os.getenv("LAUNCH_API_KEY"), endpoint=os.getenv("LAUNCH_ENDPOINT"))
+
+endpoints = client.list_llm_model_endpoints()
+
+print(endpoints)
+
+endpoint_name = "test-flan-t5-xxl"
+client.create_llm_model_endpoint(
+ endpoint_name=endpoint_name,
+ model_name="flan-t5-xxl",
+ source=LLMSource.HUGGING_FACE,
+ inference_framework=LLMInferenceFramework.DEEPSPEED,
+ inference_framework_image_tag=os.getenv("INFERENCE_FRAMEWORK_IMAGE_TAG"),
+ num_shards=4,
+ min_workers=1,
+ max_workers=1,
+ gpus=4,
+ endpoint_type="sync",
+)
+
+# Wait for the endpoint to be ready
+
+output = client.completion_sync(endpoint_name, prompts=["What is Deep Learning?"], max_new_tokens=10, temperature=0)
+print(output)
+```
@@ -2699,7 +2699,7 @@ def create_llm_model_endpoint(
  labels: An optional dictionary of key/value pairs to associate with this endpoint.
 
  Returns:
-  A Endpoint object that can be used to make requests to the endpoint.
+ A Endpoint object that can be used to make requests to the endpoint.
 
  """
  existing_endpoint = self.get_model_endpoint(endpoint_name)
 
@@ -44,6 +44,7 @@ nav:
  - concepts/callbacks.md
  - 'API Documentation':
  - api/client.md
+ - api/llms.md
  - api/model_bundles.md
  - api/model_endpoints.md
  - api/endpoint_predictions.md
 
@@ -7,8 +7,11 @@
 import pytest
 from _pytest.assertion.rewrite import AssertionRewritingHook
 
+from launch.api_client.model.completion_sync_v1_response import (
+ CompletionSyncV1Response,
+)
 from launch.model_bundle import ModelBundle
-from launch.model_endpoint import AsyncEndpoint, ModelEndpoint
+from launch.model_endpoint import AsyncEndpoint, ModelEndpoint, SyncEndpoint
 
 ROOT_DIR = Path(__file__).parent.parent
 
@@ -98,6 +101,20 @@ def mock_batch_job():
  return {"job_id": "test-batch-job", "status": "SUCCESS"}
 
 
+@pytest.fixture
+def mock_list_llm_model_endpoints():
+ mock = Mock(spec=SyncEndpoint)
+ mock.model_endpoint = Mock(spec=ModelEndpoint)
+ mock.model_endpoint.id = "test-endpoint"
+ mock.status = Mock(return_value="READY")
+ return [mock]
+
+
+@pytest.fixture
+def mock_completion_sync_response():
+ return CompletionSyncV1Response(status="SUCCESS", outputs=["Deep learning is a subnet of machine learning."])
+
+
 @pytest.mark.parametrize("module_name,source_code", generate_code_chunks("launch", "docs"))
 def test_docs_examples(
  module_name,
@@ -108,6 +125,7 @@ def test_docs_examples(
  mock_model_bundle,
  mock_async_endpoint,
  mock_batch_job,
+ mock_list_llm_model_endpoints,
 ):
  mocker.patch("launch.connection.Connection", MagicMock())
  mocker.patch("launch.client.DefaultApi", MagicMock())
@@ -120,6 +138,11 @@ def test_docs_examples(
  mocker.patch("launch.client.LaunchClient.create_model_bundle", MagicMock(return_value=mock_model_bundle))
  mocker.patch("launch.client.LaunchClient.create_model_endpoint", MagicMock(return_value=mock_async_endpoint))
  mocker.patch("launch.client.LaunchClient.get_batch_async_response", MagicMock(return_value=mock_batch_job))
+ mocker.patch(
+ "launch.client.LaunchClient.list_llm_model_endpoints", MagicMock(return_value=mock_list_llm_model_endpoints)
+ )
+ mocker.patch("launch.client.LaunchClient.create_llm_model_endpoint", MagicMock(return_value=mock_async_endpoint))
+ mocker.patch("launch.client.LaunchClient.completion_sync", MagicMock(return_value=mock_batch_job))
  mocker.patch("launch.client.Connection.make_request", MagicMock(return_value=mock_dictionary))
  mocker.patch("launch.client.requests", MagicMock())
  mocker.patch("pydantic.BaseModel.parse_raw", MagicMock())