Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit f29c1ec

Browse files
[NeuralChat] Support magicoder model (#1067)
* Support magicoder model and refine load model Signed-off-by: lvliang-intel <liang1.lv@intel.com>
1 parent 3e14b05 commit f29c1ec

File tree

5 files changed

+89
-43
lines changed

5 files changed

+89
-43
lines changed

intel_extension_for_transformers/llm/quantization/optimization.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,11 @@ def optimize(self, model, use_llm_runtime=False):
5252
or re.search("bloom", model_name, re.IGNORECASE)
5353
or re.search("llama", model_name, re.IGNORECASE)
5454
or re.search("opt", model_name, re.IGNORECASE)
55-
or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)
56-
or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
57-
or re.search("neural-chat-7b-v3", model_name, re.IGNORECASE)
55+
or re.search("neural-chat", model_name, re.IGNORECASE)
5856
or re.search("starcoder", model_name, re.IGNORECASE)
57+
or re.search("codegen", model_name, re.IGNORECASE)
58+
or re.search("mistral", model_name, re.IGNORECASE)
59+
or re.search("magicoder", model_name, re.IGNORECASE)
5960
or re.search("solar", model_name, re.IGNORECASE)
6061
):
6162
from intel_extension_for_transformers.transformers import AutoModelForCausalLM

intel_extension_for_transformers/neural_chat/chatbot.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def build_chatbot(config: PipelineConfig=None):
6969
return
7070

7171
# create model adapter
72-
if "llama" in config.model_name_or_path.lower() or "magicoder" in config.model_name_or_path.lower():
72+
if "llama" in config.model_name_or_path.lower():
7373
from .models.llama_model import LlamaModel
7474
adapter = LlamaModel()
7575
elif "mpt" in config.model_name_or_path.lower():
@@ -95,7 +95,8 @@ def build_chatbot(config: PipelineConfig=None):
9595
"flan-t5" in config.model_name_or_path.lower() or \
9696
"bloom" in config.model_name_or_path.lower() or \
9797
"starcoder" in config.model_name_or_path.lower() or \
98-
"codegen" in config.model_name_or_path.lower():
98+
"codegen" in config.model_name_or_path.lower() or \
99+
"magicoder" in config.model_name_or_path.lower():
99100
from .models.base_model import BaseModel
100101
adapter = BaseModel()
101102
else:
@@ -163,6 +164,7 @@ def build_chatbot(config: PipelineConfig=None):
163164
try:
164165
adapter.load_model(parameters)
165166
except RuntimeError as e:
167+
logger.error(f"Exception: {e}")
166168
if "out of memory" in str(e):
167169
set_latest_error(ErrorCodes.ERROR_OUT_OF_MEMORY)
168170
elif "devices are busy or unavailable" in str(e):
@@ -173,6 +175,7 @@ def build_chatbot(config: PipelineConfig=None):
173175
set_latest_error(ErrorCodes.ERROR_GENERIC)
174176
return
175177
except ValueError as e:
178+
logger.error(f"Exception: {e}")
176179
if "load_model: unsupported device" in str(e):
177180
set_latest_error(ErrorCodes.ERROR_DEVICE_NOT_SUPPORTED)
178181
elif "load_model: unsupported model" in str(e):
@@ -187,6 +190,7 @@ def build_chatbot(config: PipelineConfig=None):
187190
set_latest_error(ErrorCodes.ERROR_GENERIC)
188191
return
189192
except Exception as e:
193+
logger.error(f"Exception: {e}")
190194
set_latest_error(ErrorCodes.ERROR_GENERIC)
191195
return
192196
return adapter
@@ -204,14 +208,17 @@ def finetune_model(config: BaseFinetuningConfig):
204208
try:
205209
finetuning.finetune()
206210
except FileNotFoundError as e:
211+
logger.error(f"Exception: {e}")
207212
if "Couldn't find a dataset script" in str(e):
208213
set_latest_error(ErrorCodes.ERROR_DATASET_NOT_FOUND)
209214
except ValueError as e:
215+
logger.error(f"Exception: {e}")
210216
if "--do_eval requires a validation dataset" in str(e):
211217
set_latest_error(ErrorCodes.ERROR_VALIDATION_FILE_NOT_FOUND)
212218
elif "--do_train requires a train dataset" in str(e):
213219
set_latest_error(ErrorCodes.ERROR_TRAIN_FILE_NOT_FOUND)
214220
except Exception as e:
221+
logger.error(f"Exception: {e}")
215222
if config.finetune_args.peft == "lora":
216223
set_latest_error(ErrorCodes.ERROR_LORA_FINETUNE_FAIL)
217224
elif config.finetune_args.peft == "llama_adapter":
@@ -237,6 +244,7 @@ def optimize_model(model, config, use_llm_runtime=False):
237244
try:
238245
model = optimization.optimize(model, use_llm_runtime)
239246
except Exception as e:
247+
logger.error(f"Exception: {e}")
240248
from intel_extension_for_transformers.transformers import (
241249
MixedPrecisionConfig,
242250
WeightOnlyQuantConfig,

intel_extension_for_transformers/neural_chat/models/base_model.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from ..utils.common import is_audio_file
2525
from .model_utils import load_model, predict, predict_stream, MODELS
2626
from ..prompts import PromptTemplate
27+
from ..prompts.prompt import MAGICODER_PROMPT
2728
from ..utils.error_utils import set_latest_error
2829
from ..errorcode import ErrorCodes
2930
import logging
@@ -163,7 +164,7 @@ def predict_stream(self, query, origin_query="", config=None):
163164
self.get_conv_template(self.model_name, config.task)
164165
if (self.conv_template.roles[0] in query and self.conv_template.roles[1] in query) or \
165166
"starcoder" in self.model_name.lower() or "codellama" in self.model_name.lower() or \
166-
"codegen" in self.model_name.lower():
167+
"codegen" in self.model_name.lower() or "magicoder" in self.model_name.lower():
167168
query_include_prompt = True
168169

169170
# plugin pre actions
@@ -207,6 +208,16 @@ def predict_stream(self, query, origin_query="", config=None):
207208
if not query_include_prompt and not is_plugin_enabled("retrieval"):
208209
query = self.prepare_prompt(query, self.model_name, config.task)
209210

211+
# Phind/Phind-CodeLlama-34B-v2 model accpects Alpaca/Vicuna instruction format.
212+
if "phind" in self.model_name.lower():
213+
conv_template = PromptTemplate(name="phind")
214+
conv_template.append_message(conv_template.roles[0], query)
215+
conv_template.append_message(conv_template.roles[1], None)
216+
query = conv_template.get_prompt()
217+
218+
if "magicoder" in self.model_name.lower():
219+
query = MAGICODER_PROMPT.format(instruction=query)
220+
210221
try:
211222
response = predict_stream(
212223
**construct_parameters(query, self.model_name, self.device, self.assistant_model, config))
@@ -256,7 +267,7 @@ def predict(self, query, origin_query="", config=None):
256267
self.get_conv_template(self.model_name, config.task)
257268
if (self.conv_template.roles[0] in query and self.conv_template.roles[1] in query) or \
258269
"starcoder" in self.model_name.lower() or "codellama" in self.model_name.lower() or \
259-
"codegen" in self.model_name.lower():
270+
"codegen" in self.model_name.lower() or "magicoder" in self.model_name.lower():
260271
query_include_prompt = True
261272

262273
# plugin pre actions
@@ -298,6 +309,9 @@ def predict(self, query, origin_query="", config=None):
298309
conv_template.append_message(conv_template.roles[1], None)
299310
query = conv_template.get_prompt()
300311

312+
if "magicoder" in self.model_name.lower():
313+
query = MAGICODER_PROMPT.format(instruction=query)
314+
301315
# LLM inference
302316
try:
303317
response = predict(

intel_extension_for_transformers/neural_chat/models/model_utils.py

Lines changed: 49 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -416,30 +416,45 @@ def load_model(
416416
else:
417417
MODELS[model_name]["assistant_model"] = None
418418

419+
try:
420+
config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token, trust_remote_code=True \
421+
if (re.search("chatglm", model_name, re.IGNORECASE) or \
422+
re.search("qwen", model_name, re.IGNORECASE)) else False)
423+
except ValueError as e:
424+
logging.error(f"Exception: {e}")
425+
if "Unrecognized model in" in str(e):
426+
raise ValueError(f"load_model: model config is not found, {e}")
427+
else:
428+
raise ValueError(f"load_model: unknown ValueError occurred, {e}")
429+
except EnvironmentError as e:
430+
logging.error(f"Exception: {e}")
431+
if "not a local folder and is not a valid model identifier" in str(e):
432+
raise ValueError(f"load_model: model name or path is not found, {e}")
433+
else:
434+
raise ValueError(f"load_model: unknown EnvironmentError occurred, {e}")
435+
except Exception as e:
436+
logging.error(f"Exception: {e}")
437+
raise ValueError(f"load_model: an unexpected error occurred, {e}")
438+
439+
MODELS[model_name]["model_type"] = config.model_type
440+
419441
try:
420442
tokenizer = AutoTokenizer.from_pretrained(
421443
tokenizer_name,
422-
use_fast=False if (re.search("llama", model_name, re.IGNORECASE)
423-
or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True,
444+
use_fast=False if config.model_type == "llama" else True,
424445
use_auth_token=hf_access_token,
425446
trust_remote_code=True if (re.search("qwen", model_name, re.IGNORECASE) or \
426447
re.search("chatglm", model_name, re.IGNORECASE)) else False,
427448
)
428449
except EnvironmentError as e:
450+
logging.error(f"Exception: {e}")
429451
if "not a local folder and is not a valid model identifier" in str(e):
430-
raise ValueError("load_model: tokenizer is not found")
431-
else:
432-
raise
433-
434-
try:
435-
config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token, trust_remote_code=True \
436-
if (re.search("chatglm", model_name, re.IGNORECASE) or \
437-
re.search("qwen", model_name, re.IGNORECASE)) else False)
438-
except ValueError as e:
439-
if "Unrecognized model in" in str(e):
440-
raise ValueError("load_model: model config is not found")
452+
raise ValueError(f"load_model: tokenizer is not found, {e}")
441453
else:
442-
raise
454+
raise ValueError(f"load_model: unknown EnvironmentError occurred, {e}")
455+
except Exception as e:
456+
logging.error(f"Exception: {e}")
457+
raise ValueError(f"load_model: an unexpected error occurred, {e}")
443458

444459
load_to_meta = model_on_meta(config)
445460

@@ -478,33 +493,26 @@ def load_model(
478493
trust_remote_code=True)
479494
elif ((
480495
re.search("gpt", model_name, re.IGNORECASE)
481-
or re.search("mpt", model_name, re.IGNORECASE)
482-
or re.search("bloom", model_name, re.IGNORECASE)
483-
or re.search("llama", model_name, re.IGNORECASE)
484-
or re.search("magicoder", model_name, re.IGNORECASE)
485-
or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)
486-
or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
487-
or re.search("neural-chat-7b-v3", model_name, re.IGNORECASE)
488-
or re.search("qwen", model_name, re.IGNORECASE)
489-
or re.search("starcoder", model_name, re.IGNORECASE)
490-
or re.search("codellama", model_name, re.IGNORECASE)
491-
or re.search("mistral", model_name, re.IGNORECASE)
492-
or re.search("codegen", model_name, re.IGNORECASE)
493-
) and not ipex_int8) or re.search("opt", model_name, re.IGNORECASE):
496+
or config.model_type == "bloom"
497+
or config.model_type == "qwen"
498+
or config.model_type == "gpt_bigcode"
499+
or config.model_type == "mpt"
500+
or config.model_type == "llama"
501+
or config.model_type == "mistral"
502+
) and not ipex_int8) or config.model_type == "opt":
494503
with smart_context_manager(use_deepspeed=use_deepspeed):
495504
model = AutoModelForCausalLM.from_pretrained(
496505
model_name,
497506
use_auth_token=hf_access_token,
498507
torch_dtype=torch_dtype,
499508
low_cpu_mem_usage=True,
500509
quantization_config=bitsandbytes_quant_config,
501-
trust_remote_code=True if (re.search("qwen", model_name, re.IGNORECASE) or \
510+
trust_remote_code=True if (config.model_type == "qwen" or \
502511
re.search("codegen", model_name, re.IGNORECASE)) else False
503512
)
504513
elif (
505-
(re.search("starcoder", model_name, re.IGNORECASE)
506-
or re.search("codellama", model_name, re.IGNORECASE)
507-
or re.search("codegen", model_name, re.IGNORECASE)
514+
(config.model_type == "gpt_bigcode"
515+
or config.model_type == "llama"
508516
) and ipex_int8
509517
):
510518
with smart_context_manager(use_deepspeed=use_deepspeed):
@@ -520,9 +528,9 @@ def load_model(
520528
model_name,
521529
file_name="best_model.pt",
522530
)
523-
elif(
524-
(re.search("llama", model_name, re.IGNORECASE)
525-
or re.search("opt", model_name, re.IGNORECASE)
531+
elif (
532+
(config.model_type == "llama"
533+
or config.model_type == "opt"
526534
or re.search("gpt_neox", model_name, re.IGNORECASE)
527535
or re.search("gptj", model_name, re.IGNORECASE)
528536
or re.search("falcon", model_name, re.IGNORECASE)
@@ -547,10 +555,14 @@ def load_model(
547555
raise ValueError(f"unsupported model name or path {model_name}, \
548556
only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT/MISTRAL/CODELLAMA/STARCODER/CODEGEN now.")
549557
except EnvironmentError as e:
558+
logging.error(f"Exception: {e}")
550559
if "not a local folder and is not a valid model identifier" in str(e):
551560
raise ValueError("load_model: model name or path is not found")
552561
else:
553-
raise
562+
raise ValueError(f"load_model: unknown EnvironmentError occurred, {e}")
563+
except Exception as e:
564+
logging.error(f"Exception: {e}")
565+
raise ValueError(f"load_model: an unexpected error occurred, {e}")
554566

555567
if re.search("llama", model.config.architectures[0], re.IGNORECASE):
556568
# unwind broken decapoda-research config
@@ -1192,6 +1204,8 @@ def predict(**params):
11921204
output = tokenizer.decode(generation_output.sequences[0], skip_special_tokens=True)
11931205
if "### Response:" in output:
11941206
return output.split("### Response:")[1].strip()
1207+
if "@@ Response" in output:
1208+
return output.split("@@ Response")[1].strip()
11951209
if "### Assistant" in output:
11961210
return output.split("### Assistant:")[1].strip()
11971211
if "\nassistant\n" in output:

intel_extension_for_transformers/neural_chat/prompts/prompt.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,13 @@ def get_prompt(self) -> str:
220220
return res
221221

222222
def clear_messages(self) -> str:
223-
self.conv.messages = []
223+
self.conv.messages = []
224+
225+
# pylint: disable=C0301
226+
MAGICODER_PROMPT = """You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.
227+
228+
@@ Instruction
229+
{instruction}
230+
231+
@@ Response
232+
"""

0 commit comments

Comments
 (0)