@@ -2898,6 +2898,8 @@ def completions_sync(
2898
2898
prompt : str ,
2899
2899
max_new_tokens : int ,
2900
2900
temperature : float ,
2901
+ stop_sequences : Optional [List [str ]] = None ,
2902
+ return_token_log_probs : Optional [bool ] = False ,
2901
2903
) -> CompletionSyncV1Response :
2902
2904
"""
2903
2905
Run prompt completion on a sync LLM endpoint. Will fail if the endpoint is not sync.
@@ -2911,12 +2913,22 @@ def completions_sync(
2911
2913
2912
2914
temperature: The temperature to use for sampling
2913
2915
2916
+ stop_sequences: List of sequences to stop the completion at
2917
+
2918
+ return_token_log_probs: Whether to return the log probabilities of the tokens
2919
+
2914
2920
Returns:
2915
2921
Response for prompt completion
2916
2922
"""
2917
2923
with ApiClient (self .configuration ) as api_client :
2918
2924
api_instance = DefaultApi (api_client )
2919
- request = CompletionSyncV1Request (max_new_tokens = max_new_tokens , prompt = prompt , temperature = temperature )
2925
+ request = CompletionSyncV1Request (
2926
+ max_new_tokens = max_new_tokens ,
2927
+ prompt = prompt ,
2928
+ temperature = temperature ,
2929
+ stop_sequences = stop_sequences if stop_sequences is not None else [],
2930
+ return_token_log_probs = return_token_log_probs ,
2931
+ )
2920
2932
query_params = frozendict ({"model_endpoint_name" : endpoint_name })
2921
2933
response = api_instance .create_completion_sync_task_v1_llm_completions_sync_post ( # type: ignore
2922
2934
body = request ,
@@ -2932,6 +2944,8 @@ def completions_stream(
2932
2944
prompt : str ,
2933
2945
max_new_tokens : int ,
2934
2946
temperature : float ,
2947
+ stop_sequences : Optional [List [str ]] = None ,
2948
+ return_token_log_probs : Optional [bool ] = False ,
2935
2949
) -> Iterable [CompletionStreamV1Response ]:
2936
2950
"""
2937
2951
Run prompt completion on an LLM endpoint in streaming fashion. Will fail if endpoint does not support streaming.
@@ -2945,10 +2959,20 @@ def completions_stream(
2945
2959
2946
2960
temperature: The temperature to use for sampling
2947
2961
2962
+ stop_sequences: List of sequences to stop the completion at
2963
+
2964
+ return_token_log_probs: Whether to return the log probabilities of the tokens
2965
+
2948
2966
Returns:
2949
2967
Iterable responses for prompt completion
2950
2968
"""
2951
- request = {"max_new_tokens" : max_new_tokens , "prompt" : prompt , "temperature" : temperature }
2969
+ request = {
2970
+ "max_new_tokens" : max_new_tokens ,
2971
+ "prompt" : prompt ,
2972
+ "temperature" : temperature ,
2973
+ "stop_sequences" : stop_sequences ,
2974
+ "return_token_log_probs" : return_token_log_probs ,
2975
+ }
2952
2976
response = requests .post (
2953
2977
url = f"{ self .configuration .host } /v1/llm/completions-stream?model_endpoint_name={ endpoint_name } " ,
2954
2978
json = request ,
0 commit comments