scaleapi
diff --git a/‎launch/client.py‎
Lines changed: 5 additions & 0 deletions b/‎launch/client.py‎
Lines changed: 5 additions & 0 deletions
@@ -160,6 +160,7 @@
 from launch.request_validation import validate_task_request
 
 DEFAULT_NETWORK_TIMEOUT_SEC = 120
+DEFAULT_LLM_COMPLETIONS_TIMEOUT = 300
 
 logger = logging.getLogger(__name__)
 logging.basicConfig()
@@ -2911,6 +2912,7 @@ def completions_sync(
  temperature: float,
  stop_sequences: Optional[List[str]] = None,
  return_token_log_probs: Optional[bool] = False,
+ timeout: float = DEFAULT_LLM_COMPLETIONS_TIMEOUT,
  ) -> CompletionSyncV1Response:
  """
  Run prompt completion on a sync LLM endpoint. Will fail if the endpoint is not sync.
@@ -2945,6 +2947,7 @@ def completions_sync(
  body=request,
  query_params=query_params,
  skip_deserialization=True,
+ timeout=timeout,
  )
  resp = json.loads(response.response.data)
  return resp
@@ -2957,6 +2960,7 @@ def completions_stream(
  temperature: float,
  stop_sequences: Optional[List[str]] = None,
  return_token_log_probs: Optional[bool] = False,
+ timeout: float = DEFAULT_LLM_COMPLETIONS_TIMEOUT,
  ) -> Iterable[CompletionStreamV1Response]:
  """
  Run prompt completion on an LLM endpoint in streaming fashion. Will fail if endpoint does not support streaming.
@@ -2989,6 +2993,7 @@ def completions_stream(
  json=request,
  auth=(self.configuration.username, self.configuration.password),
  stream=True,
+ timeout=timeout,
  )
  sse_client = sseclient.SSEClient(response)
  events = sse_client.events()