@@ -416,30 +416,45 @@ def load_model(
416416 else :
417417 MODELS [model_name ]["assistant_model" ] = None
418418
419+ try :
420+ config = AutoConfig .from_pretrained (model_name , use_auth_token = hf_access_token , trust_remote_code = True \
421+ if (re .search ("chatglm" , model_name , re .IGNORECASE ) or \
422+ re .search ("qwen" , model_name , re .IGNORECASE )) else False )
423+ except ValueError as e :
424+ logging .error (f"Exception: { e } " )
425+ if "Unrecognized model in" in str (e ):
426+ raise ValueError (f"load_model: model config is not found, { e } " )
427+ else :
428+ raise ValueError (f"load_model: unknown ValueError occurred, { e } " )
429+ except EnvironmentError as e :
430+ logging .error (f"Exception: { e } " )
431+ if "not a local folder and is not a valid model identifier" in str (e ):
432+ raise ValueError (f"load_model: model name or path is not found, { e } " )
433+ else :
434+ raise ValueError (f"load_model: unknown EnvironmentError occurred, { e } " )
435+ except Exception as e :
436+ logging .error (f"Exception: { e } " )
437+ raise ValueError (f"load_model: an unexpected error occurred, { e } " )
438+
439+ MODELS [model_name ]["model_type" ] = config .model_type
440+
419441 try :
420442 tokenizer = AutoTokenizer .from_pretrained (
421443 tokenizer_name ,
422- use_fast = False if (re .search ("llama" , model_name , re .IGNORECASE )
423- or re .search ("neural-chat-7b-v2" , model_name , re .IGNORECASE )) else True ,
444+ use_fast = False if config .model_type == "llama" else True ,
424445 use_auth_token = hf_access_token ,
425446 trust_remote_code = True if (re .search ("qwen" , model_name , re .IGNORECASE ) or \
426447 re .search ("chatglm" , model_name , re .IGNORECASE )) else False ,
427448 )
428449 except EnvironmentError as e :
450+ logging .error (f"Exception: { e } " )
429451 if "not a local folder and is not a valid model identifier" in str (e ):
430- raise ValueError ("load_model: tokenizer is not found" )
431- else :
432- raise
433-
434- try :
435- config = AutoConfig .from_pretrained (model_name , use_auth_token = hf_access_token , trust_remote_code = True \
436- if (re .search ("chatglm" , model_name , re .IGNORECASE ) or \
437- re .search ("qwen" , model_name , re .IGNORECASE )) else False )
438- except ValueError as e :
439- if "Unrecognized model in" in str (e ):
440- raise ValueError ("load_model: model config is not found" )
452+ raise ValueError (f"load_model: tokenizer is not found, { e } " )
441453 else :
442- raise
454+ raise ValueError (f"load_model: unknown EnvironmentError occurred, { e } " )
455+ except Exception as e :
456+ logging .error (f"Exception: { e } " )
457+ raise ValueError (f"load_model: an unexpected error occurred, { e } " )
443458
444459 load_to_meta = model_on_meta (config )
445460
@@ -478,33 +493,26 @@ def load_model(
478493 trust_remote_code = True )
479494 elif ((
480495 re .search ("gpt" , model_name , re .IGNORECASE )
481- or re .search ("mpt" , model_name , re .IGNORECASE )
482- or re .search ("bloom" , model_name , re .IGNORECASE )
483- or re .search ("llama" , model_name , re .IGNORECASE )
484- or re .search ("magicoder" , model_name , re .IGNORECASE )
485- or re .search ("neural-chat-7b-v1" , model_name , re .IGNORECASE )
486- or re .search ("neural-chat-7b-v2" , model_name , re .IGNORECASE )
487- or re .search ("neural-chat-7b-v3" , model_name , re .IGNORECASE )
488- or re .search ("qwen" , model_name , re .IGNORECASE )
489- or re .search ("starcoder" , model_name , re .IGNORECASE )
490- or re .search ("codellama" , model_name , re .IGNORECASE )
491- or re .search ("mistral" , model_name , re .IGNORECASE )
492- or re .search ("codegen" , model_name , re .IGNORECASE )
493- ) and not ipex_int8 ) or re .search ("opt" , model_name , re .IGNORECASE ):
496+ or config .model_type == "bloom"
497+ or config .model_type == "qwen"
498+ or config .model_type == "gpt_bigcode"
499+ or config .model_type == "mpt"
500+ or config .model_type == "llama"
501+ or config .model_type == "mistral"
502+ ) and not ipex_int8 ) or config .model_type == "opt" :
494503 with smart_context_manager (use_deepspeed = use_deepspeed ):
495504 model = AutoModelForCausalLM .from_pretrained (
496505 model_name ,
497506 use_auth_token = hf_access_token ,
498507 torch_dtype = torch_dtype ,
499508 low_cpu_mem_usage = True ,
500509 quantization_config = bitsandbytes_quant_config ,
501- trust_remote_code = True if (re . search ( "qwen" , model_name , re . IGNORECASE ) or \
510+ trust_remote_code = True if (config . model_type == "qwen" or \
502511 re .search ("codegen" , model_name , re .IGNORECASE )) else False
503512 )
504513 elif (
505- (re .search ("starcoder" , model_name , re .IGNORECASE )
506- or re .search ("codellama" , model_name , re .IGNORECASE )
507- or re .search ("codegen" , model_name , re .IGNORECASE )
514+ (config .model_type == "gpt_bigcode"
515+ or config .model_type == "llama"
508516 ) and ipex_int8
509517 ):
510518 with smart_context_manager (use_deepspeed = use_deepspeed ):
@@ -520,9 +528,9 @@ def load_model(
520528 model_name ,
521529 file_name = "best_model.pt" ,
522530 )
523- elif (
524- (re . search ( "llama" , model_name , re . IGNORECASE )
525- or re . search ( "opt" , model_name , re . IGNORECASE )
531+ elif (
532+ (config . model_type == "llama"
533+ or config . model_type == "opt"
526534 or re .search ("gpt_neox" , model_name , re .IGNORECASE )
527535 or re .search ("gptj" , model_name , re .IGNORECASE )
528536 or re .search ("falcon" , model_name , re .IGNORECASE )
@@ -547,10 +555,14 @@ def load_model(
547555 raise ValueError (f"unsupported model name or path { model_name } , \
548556 only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT/MISTRAL/CODELLAMA/STARCODER/CODEGEN now." )
549557 except EnvironmentError as e :
558+ logging .error (f"Exception: { e } " )
550559 if "not a local folder and is not a valid model identifier" in str (e ):
551560 raise ValueError ("load_model: model name or path is not found" )
552561 else :
553- raise
562+ raise ValueError (f"load_model: unknown EnvironmentError occurred, { e } " )
563+ except Exception as e :
564+ logging .error (f"Exception: { e } " )
565+ raise ValueError (f"load_model: an unexpected error occurred, { e } " )
554566
555567 if re .search ("llama" , model .config .architectures [0 ], re .IGNORECASE ):
556568 # unwind broken decapoda-research config
@@ -1192,6 +1204,8 @@ def predict(**params):
11921204 output = tokenizer .decode (generation_output .sequences [0 ], skip_special_tokens = True )
11931205 if "### Response:" in output :
11941206 return output .split ("### Response:" )[1 ].strip ()
1207+ if "@@ Response" in output :
1208+ return output .split ("@@ Response" )[1 ].strip ()
11951209 if "### Assistant" in output :
11961210 return output .split ("### Assistant:" )[1 ].strip ()
11971211 if "\n assistant\n " in output :
0 commit comments