Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit d8cc116

Browse files
Refine notebook and fix restful api issues (#445)
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
1 parent ebbbff4 commit d8cc116

File tree

19 files changed

+84
-137
lines changed

19 files changed

+84
-137
lines changed

intel_extension_for_transformers/neural_chat/docs/full_notebooks.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ Welcome to use Jupyter Notebooks to explore how to build and customize chatbots
1919
| 2.4 | Deploying Chatbot on Habana Gaudi1/Gaudi2 | Learn how to deploy chatbot on Habana Gaudi1/Gaudi2 | [Notebook](./notebooks/deploy_chatbot_on_habana_gaudi.ipynb) |
2020
| 2.5 | Deploying Chatbot on Nvidia A100 | Learn how to deploy chatbot on A100 | [Notebook](./notebooks/deploy_chatbot_on_nv_a100.ipynb) |
2121
| 2.6 | Deploying Chatbot with Load Balance | Learn how to deploy chatbot with load balance | [Notebook](./notebooks/chatbot_with_load_balance.ipynb) |
22-
| 2.7 | Deploying End-to-end Chatbot on Intel CPU SPR | Learn how to deploy an end to end text chatbot on Intel CPU SPR including frontend GUI and backend | [Notebook](./notebooks/setup_text_chatbot_service_on_spr.ipynb) |
22+
| 2.7 | Deploying End-to-end text Chatbot on Intel CPU SPR | Learn how to deploy an end to end text chatbot on Intel CPU SPR including frontend GUI and backend | [Notebook](./notebooks/setup_text_chatbot_service_on_spr.ipynb) |
23+
| 2.8 | Deploying End-to-end talkingbot on Intel CPU SPR | Learn how to deploy an end to end talkingbot on Intel CPU SPR including frontend GUI and backend | [Notebook](./notebooks/setup_talking_chatbot_service_on_spr.ipynb) |
2324
| 3 | Optimizing Chatbots | | |
2425
| 3.1 | Enabling Chatbot with BF16 Optimization on SPR | Learn how to optimize chatbot using mixed precision on SPR | [Notebook](./notebooks/amp_optimization_on_spr.ipynb) |
2526
| 3.2 | Enabling Chatbot with BF16 Optimization on Habana Gaudi1/Gaudi2 | Learn how to optimze chatbot using mixed precision on Habana Gaudi1/Gaudi2 | [Notebook](./notebooks/amp_optimization_on_habana_gaudi.ipynb) |

intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_habana_gaudi.ipynb

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -154,27 +154,6 @@
154154
"print(\" Play Output Audio ......\")\n",
155155
"IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
156156
]
157-
},
158-
{
159-
"cell_type": "markdown",
160-
"metadata": {},
161-
"source": [
162-
"## Access Finetune Service"
163-
]
164-
},
165-
{
166-
"cell_type": "code",
167-
"execution_count": null,
168-
"metadata": {},
169-
"outputs": [],
170-
"source": [
171-
"from neural_chat import FinetuningClientExecutor\n",
172-
"executor = FinetuningClientExecutor()\n",
173-
"tuning_status = executor(\n",
174-
" server_ip=\"127.0.0.1\", # master server ip\n",
175-
" port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
176-
" )"
177-
]
178157
}
179158
],
180159
"metadata": {

intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_icx.ipynb

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -167,27 +167,6 @@
167167
"print(\" Play Output Audio ......\")\n",
168168
"IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
169169
]
170-
},
171-
{
172-
"cell_type": "markdown",
173-
"metadata": {},
174-
"source": [
175-
"## Access Finetune Service"
176-
]
177-
},
178-
{
179-
"cell_type": "code",
180-
"execution_count": null,
181-
"metadata": {},
182-
"outputs": [],
183-
"source": [
184-
"from neural_chat import FinetuningClientExecutor\n",
185-
"executor = FinetuningClientExecutor()\n",
186-
"tuning_status = executor(\n",
187-
" server_ip=\"127.0.0.1\", # master server ip\n",
188-
" port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
189-
" )"
190-
]
191170
}
192171
],
193172
"metadata": {

intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_nv_a100.ipynb

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -167,27 +167,6 @@
167167
"print(\" Play Output Audio ......\")\n",
168168
"IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
169169
]
170-
},
171-
{
172-
"cell_type": "markdown",
173-
"metadata": {},
174-
"source": [
175-
"## Access Finetune Service"
176-
]
177-
},
178-
{
179-
"cell_type": "code",
180-
"execution_count": null,
181-
"metadata": {},
182-
"outputs": [],
183-
"source": [
184-
"from neural_chat import FinetuningClientExecutor\n",
185-
"executor = FinetuningClientExecutor()\n",
186-
"tuning_status = executor(\n",
187-
" server_ip=\"127.0.0.1\", # master server ip\n",
188-
" port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
189-
" )"
190-
]
191170
}
192171
],
193172
"metadata": {

intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_spr.ipynb

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -169,27 +169,6 @@
169169
"print(\" Play Output Audio ......\")\n",
170170
"IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
171171
]
172-
},
173-
{
174-
"cell_type": "markdown",
175-
"metadata": {},
176-
"source": [
177-
"## Access Finetune Service"
178-
]
179-
},
180-
{
181-
"cell_type": "code",
182-
"execution_count": null,
183-
"metadata": {},
184-
"outputs": [],
185-
"source": [
186-
"from intel_extension_for_transformers.neural_chat import FinetuningClientExecutor\n",
187-
"executor = FinetuningClientExecutor()\n",
188-
"tuning_status = executor(\n",
189-
" server_ip=\"127.0.0.1\", # master server ip\n",
190-
" port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
191-
" )"
192-
]
193172
}
194173
],
195174
"metadata": {

intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -197,27 +197,6 @@
197197
"print(\" Play Output Audio ......\")\n",
198198
"IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
199199
]
200-
},
201-
{
202-
"cell_type": "markdown",
203-
"metadata": {},
204-
"source": [
205-
"## Access Finetune Service"
206-
]
207-
},
208-
{
209-
"cell_type": "code",
210-
"execution_count": null,
211-
"metadata": {},
212-
"outputs": [],
213-
"source": [
214-
"from neural_chat import FinetuningClientExecutor\n",
215-
"executor = FinetuningClientExecutor()\n",
216-
"tuning_status = executor(\n",
217-
" server_ip=\"127.0.0.1\", # master server ip\n",
218-
" port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
219-
" )"
220-
]
221200
}
222201
],
223202
"metadata": {

intel_extension_for_transformers/neural_chat/models/base_model.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
from abc import ABC
1919
from typing import List
20-
import os
20+
import os, types
2121
from fastchat.conversation import get_conv_template, Conversation
2222
from ..config import GenerationConfig
2323
from ..plugins import is_plugin_enabled, get_plugin_instance, get_registered_plugins, plugins
@@ -135,6 +135,11 @@ def predict_stream(self, query, config=None):
135135
if not os.path.exists(query):
136136
raise ValueError(f"The audio file path {query} is invalid.")
137137

138+
query_include_prompt = False
139+
self.get_conv_template(self.model_name, config.task)
140+
if self.conv_template.roles[0] in query and self.conv_template.roles[1] in query:
141+
query_include_prompt = True
142+
138143
# plugin pre actions
139144
for plugin_name in get_registered_plugins():
140145
if is_plugin_enabled(plugin_name):
@@ -150,18 +155,25 @@ def predict_stream(self, query, config=None):
150155
if plugin_name == "safety_checker" and response:
151156
return "Your query contains sensitive words, please try another query."
152157
else:
153-
query = response
158+
if response != None and response != False:
159+
query = response
154160
assert query is not None, "Query cannot be None."
155161

156-
query = self.prepare_prompt(query, self.model_name, config.task)
162+
if not query_include_prompt:
163+
query = self.prepare_prompt(query, self.model_name, config.task)
157164
response = predict_stream(**construct_parameters(query, self.model_name, self.device, config))
158165

166+
def is_generator(obj):
167+
return isinstance(obj, types.GeneratorType)
168+
159169
# plugin post actions
160170
for plugin_name in get_registered_plugins():
161171
if is_plugin_enabled(plugin_name):
162172
plugin_instance = get_plugin_instance(plugin_name)
163173
if plugin_instance:
164174
if hasattr(plugin_instance, 'post_llm_inference_actions'):
175+
if plugin_name == "safety_checker" and is_generator(response):
176+
continue
165177
response = plugin_instance.post_llm_inference_actions(response)
166178

167179
# clear plugins config
@@ -195,6 +207,11 @@ def predict(self, query, config=None):
195207
if not os.path.exists(query):
196208
raise ValueError(f"The audio file path {query} is invalid.")
197209

210+
query_include_prompt = False
211+
self.get_conv_template(self.model_name, config.task)
212+
if self.conv_template.roles[0] in query and self.conv_template.roles[1] in query:
213+
query_include_prompt = True
214+
198215
# plugin pre actions
199216
for plugin_name in get_registered_plugins():
200217
if is_plugin_enabled(plugin_name):
@@ -214,8 +231,9 @@ def predict(self, query, config=None):
214231
query = response
215232
assert query is not None, "Query cannot be None."
216233

234+
if not query_include_prompt:
235+
query = self.prepare_prompt(query, self.model_name, config.task)
217236
# LLM inference
218-
query = self.prepare_prompt(query, self.model_name, config.task)
219237
response = predict(**construct_parameters(query, self.model_name, self.device, config))
220238

221239
# plugin post actions

intel_extension_for_transformers/neural_chat/models/neuralchat_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,6 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
5252
if "neural-chat-7b-v2" in model_path.lower():
5353
return get_conv_template("neural-chat-7b-v2")
5454
else:
55-
return get_conv_template("neural-chat-7b-v1.1")
55+
return get_conv_template("neural-chat-7b-v1-1")
5656

5757
register_model_adapter(NeuralChatModel)

intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717

1818
# pylint: disable=wrong-import-position
1919
from typing import Any
20-
20+
import os
2121
import gptcache.processor.post
2222
import gptcache.processor.pre
23-
from gptcache import Cache, Config
23+
from gptcache import Cache, cache, Config
2424
from gptcache.adapter.adapter import adapt
2525
from gptcache.embedding import (
2626
Onnx,
@@ -55,8 +55,11 @@
5555
import time
5656

5757
class ChatCache:
58-
def __init__(self):
59-
self.cache_obj = Cache()
58+
def __init__(self, config_dir: str=os.path.join(
59+
os.path.dirname(os.path.abspath(__file__)), "./cache_config.yaml"),
60+
embedding_model_dir: str="hkunlp/instructor-large"):
61+
self.cache_obj = cache
62+
self.init_similar_cache_from_config(config_dir, embedding_model_dir)
6063

6164
def _cache_data_converter(self, cache_data):
6265
return self._construct_resp_from_cache(cache_data)
@@ -130,8 +133,7 @@ def init_similar_cache(self, data_dir: str = "api_cache", pre_func=get_prompt,
130133
config=config,
131134
)
132135

133-
def init_similar_cache_from_config(self, config_dir: str="./config/cache_config.yml",
134-
embedding_model_dir: str="hkunlp/instructor-large"):
136+
def init_similar_cache_from_config(self, config_dir, embedding_model_dir):
135137
import_ruamel()
136138
from ruamel.yaml import YAML # pylint: disable=C0415
137139

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
gptcache
2+
git+https://github.com/UKPLab/sentence-transformers.git

0 commit comments

Comments
 (0)