Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.
4 changes: 4 additions & 0 deletions intel_extension_for_transformers/llm/inference/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,10 @@ def generate_output():
input_tokens = prepare_inputs(
input_tokens, model.device if hasattr(model, 'device') else torch.device(device)
)
else:
input_tokens = tokenizer.batch_encode_plus(
[prompt], return_tensors="pt", padding=True
)
with context:
generation_kwargs = dict(
streamer=streamer,
Expand Down
5 changes: 1 addition & 4 deletions intel_extension_for_transformers/neural_chat/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from .config import PipelineConfig
from .config import BaseFinetuningConfig
from .config import DeviceOptions
from .plugins import plugins, global_plugins
from .plugins import plugins

def build_chatbot(config: PipelineConfig=None):
"""Build the chatbot with a given configuration.
Expand Down Expand Up @@ -101,9 +101,6 @@ def build_chatbot(config: PipelineConfig=None):
plugins[plugin_name]["instance"] = plugins[plugin_name]['class'](**plugin_value['args'])
adapter.register_plugin_instance(plugin_name, plugins[plugin_name]["instance"])

global_plugins.reset_plugins()
plugins = global_plugins.plugins

parameters = {}
parameters["model_name"] = config.model_name_or_path
if config.tokenizer_name_or_path:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ RUN source activate && conda activate chatbot-finetuning && pip install oneccl_b
git clone https://github.com/huggingface/peft.git && cd peft && python setup.py install && \
cd /intel-extension-for-transformers && pip install -v . && \
cd ./intel_extension_for_transformers/neural_chat/examples/instruction_tuning && pip install -r requirements.txt && \
cd /intel-extension-for-transformers/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat && pip install -r requirements.txt
cd /intel-extension-for-transformers/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat && pip install -r requirements_cpu.txt

# Enable passwordless ssh for mpirun^M
RUN mkdir /var/run/sshd
Expand Down Expand Up @@ -120,7 +120,7 @@ RUN git clone --single-branch --branch=${ITREX_VER} ${REPO} intel-extension-for-
cd /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/examples/instruction_tuning/ && \
pip install -r requirements.txt && \
cd /intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ && \
pip install -r requirements.txt
pip install -r requirements_hpu.txt

# Build ITREX
RUN cd /intel-extension-for-transformers && pip install -v . && \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ RUN source activate && conda activate chatbot-demo && \
pip install schema && \
pip install datasets torch transformers sentencepiece peft evaluate nltk rouge_score && \
cd /root/chatbot && git clone https://github.com/intel/intel-extension-for-transformers.git \
&& cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ && pip install -r requirements.txt
&& cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ && pip install -r requirements_cpu.txt

ENV KMP_BLOCKTIME=1
ENV KMP_SETTINGS=1
Expand Down Expand Up @@ -111,7 +111,7 @@ RUN git clone https://github.com/huggingface/optimum-habana.git && \

RUN git clone https://github.com/intel/intel-extension-for-transformers.git \
&& cd ./intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/ \
&& pip install -r requirements.txt \
&& pip install -r requirements_hpu.txt \
&& pip install transformers==4.32.0 \
&& pip install accelerate==0.22.0 \
&& pip uninstall -y intel_extension_for_pytorch
Expand Down
13 changes: 11 additions & 2 deletions intel_extension_for_transformers/neural_chat/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from typing import List
import os
from fastchat.conversation import get_conv_template, Conversation
from intel_extension_for_transformers.llm.inference import load_model, predict, predict_stream, MODELS
from intel_extension_for_transformers.llm.inference import load_model, predict, predict_stream
from ..config import GenerationConfig
from ..plugins import is_plugin_enabled, get_plugin_instance, get_registered_plugins, get_plugin_arguments
from ..plugins import is_plugin_enabled, get_plugin_instance, get_registered_plugins, plugins
from ..utils.common import is_audio_file


Expand Down Expand Up @@ -204,6 +204,15 @@ def predict(self, query, config=None):
if hasattr(plugin_instance, 'post_llm_inference_actions'):
response = plugin_instance.post_llm_inference_actions(response)

# clear plugins config
for key in plugins:
plugins[key] = {
"enable": False,
"class": None,
"args": {},
"instance": None
}

return response

def chat_stream(self, query, config=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ rouge_score
openpyxl
numpy==1.23.5
tiktoken==0.4.0
lm_eval
38 changes: 38 additions & 0 deletions intel_extension_for_transformers/neural_chat/requirements_cpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
transformers>=4.32.0
peft
fschat
torch
torchaudio
intel_extension_for_pytorch
num2words
speechbrain
paddlepaddle
paddlespeech==1.4.1
shortuuid
gptcache
evaluate
pydub
python-multipart
PyPDF2
langchain
python-docx
scikit-learn
farm-haystack
librosa
beautifulsoup4
InstructorEmbedding
chromadb
fastapi
pydantic
starlette
yacs
uvicorn
optimum
sentence_transformers
unstructured
markdown
rouge_score
openpyxl
numpy==1.23.5
tiktoken==0.4.0
lm_eval
35 changes: 35 additions & 0 deletions intel_extension_for_transformers/neural_chat/requirements_hpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
transformers>=4.32.0
peft
fschat
num2words
speechbrain
paddlepaddle
paddlespeech==1.4.1
shortuuid
gptcache
evaluate
pydub
python-multipart
PyPDF2
langchain
python-docx
scikit-learn
farm-haystack
librosa
beautifulsoup4
InstructorEmbedding
chromadb
fastapi
pydantic
starlette
yacs
uvicorn
optimum
sentence_transformers
unstructured
markdown
rouge_score
openpyxl
numpy==1.23.5
tiktoken==0.4.0
lm_eval
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_build_chatbot_with_audio_plugin(self):

self.assertIsNotNone(response)
print("output audio path: ", response)
self.assertTrue(os.path.exists(plugins.tts.args["output_audio_path"]))
self.assertTrue(os.path.exists("./output_audio.wav"))

def test_build_chatbot_with_safety_checker_plugin(self):
plugins.safety_checker.enable = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,5 +64,14 @@ def test_quantization(self):
config = AMPConfig()
optimize_model(model="facebook/opt-125m", config=config)

def test_text_chat_stream(self):
config = PipelineConfig(model_name_or_path="facebook/opt-125m")
chatbot = build_chatbot(config)
stream_text = ""
for text in chatbot.predict_stream("Tell me about Intel Xeon Scalable Processors."):
stream_text += text
print(text)
self.assertIsNotNone(stream_text)

if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ rouge_score
openpyxl
numpy==1.23.5
tiktoken==0.4.0
lm_eval