55from  typing  import  AsyncIterator 
66
77import  anyio 
8+ import  vllm 
89from  fastapi  import  APIRouter , Depends , status 
910from  fastapi  import  HTTPException , Request 
1011from  loguru  import  logger 
3839)
3940
4041chat_router  =  APIRouter (prefix = "/chat" )
42+ vllm_version  =  vllm .__version__ 
4143
4244
4345def  get_engine ():
@@ -105,17 +107,16 @@ async def create_chat_completion(
105107 try :
106108 from  vllm .model_executor .guided_decoding  import  get_guided_decoding_logits_processor 
107109
108-  decoding_config  =  await  engine .model .get_decoding_config ()
109- 
110-  try :
110+  if  vllm_version  >=  "0.4.3" :
111+  decoding_config  =  await  engine .model .get_decoding_config ()
111112 guided_decode_logits_processor  =  (
112113 await  get_guided_decoding_logits_processor (
113114 request .guided_decoding_backend  or  decoding_config .guided_decoding_backend ,
114115 request ,
115116 engine .tokenizer ,
116117 )
117118 )
118-  except   TypeError :
119+  else :
119120 guided_decode_logits_processor  =  (
120121 await  get_guided_decoding_logits_processor (
121122 request ,
@@ -128,7 +129,7 @@ async def create_chat_completion(
128129 except  ImportError :
129130 pass 
130131
131-  try :
132+  if   vllm_version   >=   "0.4.3" :
132133 result_generator  =  engine .model .generate (
133134 {
134135 "prompt" : prompt  if  isinstance (prompt , str ) else  None ,
@@ -138,7 +139,7 @@ async def create_chat_completion(
138139 request_id ,
139140 lora_request ,
140141 )
141-  except   TypeError :
142+  else :
142143 result_generator  =  engine .model .generate (
143144 prompt  if  isinstance (prompt , str ) else  None ,
144145 sampling_params ,
0 commit comments