intel · VincyZhang · Oct 13, 2023 · Oct 10, 2023 · Oct 10, 2023 · Oct 10, 2023
diff --git a/intel_extension_for_transformers/neural_chat/chatbot.py b/intel_extension_for_transformers/neural_chat/chatbot.py
@@ -64,6 +64,9 @@ def build_chatbot(config: PipelineConfig=None):
  elif "chatglm" in config.model_name_or_path:
  from .models.chatglm_model import ChatGlmModel
  adapter = ChatGlmModel()
+ elif "Qwen" in config.model_name_or_path:
+ from .models.qwen_model import QwenModel
+ adapter = QwenModel()
  elif "opt" in config.model_name_or_path or \
  "gpt" in config.model_name_or_path or \
  "flan-t5" in config.model_name_or_path or \
@@ -72,7 +75,7 @@ def build_chatbot(config: PipelineConfig=None):
  adapter = BaseModel()
  else:
  raise ValueError("NeuralChat Error: Unsupported model name or path, \
- only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/NEURAL-CHAT now.")
+ only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT now.")
 
  # register plugin instance in model adaptor
  if config.plugins:

diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/build_chatbot_on_spr.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/build_chatbot_on_spr.ipynb
@@ -47,7 +47,7 @@
  "source": [
  "!git clone https://github.com/intel/intel-extension-for-transformers.git\n",
  "!cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/\n",
- "!pip install -r requirements.txt"
+ "!pip install -r requirements_cpu.txt"
  ]
  },
  {

diff --git a/intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb b/intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb
@@ -52,7 +52,7 @@
  "source": [
  "!git clone https://github.com/intel/intel-extension-for-transformers.git\n",
  "!cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/\n",
- "!pip install -r requirements.txt"
+ "!pip install -r requirements_xpu.txt"
  ]
  },
  {
@@ -139,7 +139,7 @@
  "metadata": {},
  "outputs": [],
  "source": [
- "from neural_chat import TextChatClientExecutor\n",
+ "from intel_extension_for_transformers.neural_chat import TextChatClientExecutor\n",
  "executor = TextChatClientExecutor()\n",
  "result = executor(\n",
  " prompt=\"Tell me about Intel Xeon Scalable Processors.\",\n",

diff --git a/intel_extension_for_transformers/neural_chat/models/model_utils.py b/intel_extension_for_transformers/neural_chat/models/model_utils.py
@@ -330,6 +330,7 @@ def load_model(
  use_fast=False if (re.search("llama", model_name, re.IGNORECASE)
  or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True,
  use_auth_token=hf_access_token,
+ trust_remote_code=True if (re.search("qwen", model_name, re.IGNORECASE)) else False,
  )
  config = AutoConfig.from_pretrained(model_name, use_auth_token=hf_access_token)
  load_to_meta = model_on_meta(config)
@@ -356,6 +357,7 @@ def load_model(
  or re.search("opt", model_name, re.IGNORECASE)
  or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)
  or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)
+ or re.search("qwen", model_name, re.IGNORECASE)
  ):
  with smart_context_manager(use_deepspeed=use_deepspeed):
  model = AutoModelForCausalLM.from_pretrained(
@@ -367,7 +369,7 @@ def load_model(
  )
  else:
  raise ValueError(
- f"Unsupported model {model_name}, only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/NEURAL-CHAT now."
+ f"Unsupported model {model_name}, only supports FLAN-T5/LLAMA/MPT/GPT/BLOOM/OPT/QWEN/NEURAL-CHAT now."
  )
 
  if re.search("llama", model.config.architectures[0], re.IGNORECASE):

diff --git a/intel_extension_for_transformers/neural_chat/models/qwen_model.py b/intel_extension_for_transformers/neural_chat/models/qwen_model.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_model import BaseModel, register_model_adapter
+import logging
+from fastchat.conversation import get_conv_template, Conversation
+
+logging.basicConfig(
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+ datefmt="%m/%d/%Y %H:%M:%S",
+ level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+
+class QwenModel(BaseModel):
+ def match(self, model_path: str):
+ """
+ Check if the provided model_path matches the current model.
+
+ Args:
+ model_path (str): Path to a model.
+
+ Returns:
+ bool: True if the model_path matches, False otherwise.
+ """
+ return "qwen" in model_path.lower()
+
+ def get_default_conv_template(self, model_path: str) -> Conversation:
+ """
+ Get the default conversation template for the given model path.
+
+ Args:
+ model_path (str): Path to the model.
+
+ Returns:
+ Conversation: A default conversation template.
+ """
+ return get_conv_template("qwen-7b-chat")
+
+register_model_adapter(QwenModel)
diff --git a/intel_extension_for_transformers/neural_chat/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/requirements_cpu.txt
@@ -1,7 +1,7 @@
 transformers>=4.32.0
 peft
 fschat
-intel_extension_for_pytorch
+intel_extension_for_pytorch==2.0.100
 num2words
 speechbrain
 paddlepaddle
@@ -35,6 +35,7 @@ openpyxl
 numpy==1.23.5
 tiktoken==0.4.0
 lm_eval
+transformers_stream_generator==0.0.4
 --extra-index-url https://download.pytorch.org/whl/cpu
 torch==2.0.1
 torchaudio==2.0.2
diff --git a/intel_extension_for_transformers/neural_chat/requirements_xpu.txt b/intel_extension_for_transformers/neural_chat/requirements_xpu.txt
@@ -28,4 +28,5 @@ rouge_score
 openpyxl
 numpy==1.23.5
 tiktoken==0.4.0
+transformers_stream_generator==0.0.4
 cchardet