intel
diff --git a/‎intel_extension_for_transformers/neural_chat/docs/full_notebooks.md‎
Lines changed: 2 additions & 1 deletion b/‎intel_extension_for_transformers/neural_chat/docs/full_notebooks.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_habana_gaudi.ipynb‎
Lines changed: 0 additions & 21 deletions b/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_habana_gaudi.ipynb‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_icx.ipynb‎
Lines changed: 0 additions & 21 deletions b/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_icx.ipynb‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_nv_a100.ipynb‎
Lines changed: 0 additions & 21 deletions b/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_nv_a100.ipynb‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_spr.ipynb‎
Lines changed: 0 additions & 21 deletions b/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_spr.ipynb‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb‎
Lines changed: 0 additions & 21 deletions b/‎intel_extension_for_transformers/neural_chat/docs/notebooks/deploy_chatbot_on_xpu.ipynb‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/models/base_model.py‎
Lines changed: 22 additions & 4 deletions b/‎intel_extension_for_transformers/neural_chat/models/base_model.py‎
Lines changed: 22 additions & 4 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/models/neuralchat_model.py‎
Lines changed: 1 addition & 1 deletion b/‎intel_extension_for_transformers/neural_chat/models/neuralchat_model.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache.py‎
Lines changed: 8 additions & 6 deletions b/‎intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/cache.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/pipeline/plugins/caching/requirements.txt‎
Lines changed: 1 addition & 0 deletions
@@ -19,7 +19,8 @@ Welcome to use Jupyter Notebooks to explore how to build and customize chatbots
 | 2.4 | Deploying Chatbot on Habana Gaudi1/Gaudi2 | Learn how to deploy chatbot on Habana Gaudi1/Gaudi2 | [Notebook](./notebooks/deploy_chatbot_on_habana_gaudi.ipynb) |
 | 2.5 | Deploying Chatbot on Nvidia A100 | Learn how to deploy chatbot on A100 | [Notebook](./notebooks/deploy_chatbot_on_nv_a100.ipynb) |
 | 2.6 | Deploying Chatbot with Load Balance | Learn how to deploy chatbot with load balance | [Notebook](./notebooks/chatbot_with_load_balance.ipynb) |
-| 2.7 | Deploying End-to-end Chatbot on Intel CPU SPR | Learn how to deploy an end to end text chatbot on Intel CPU SPR including frontend GUI and backend | [Notebook](./notebooks/setup_text_chatbot_service_on_spr.ipynb) |
+| 2.7 | Deploying End-to-end text Chatbot on Intel CPU SPR | Learn how to deploy an end to end text chatbot on Intel CPU SPR including frontend GUI and backend | [Notebook](./notebooks/setup_text_chatbot_service_on_spr.ipynb) |
+| 2.8 | Deploying End-to-end talkingbot on Intel CPU SPR | Learn how to deploy an end to end talkingbot on Intel CPU SPR including frontend GUI and backend | [Notebook](./notebooks/setup_talking_chatbot_service_on_spr.ipynb) |
 | 3 | Optimizing Chatbots | | |
 | 3.1 | Enabling Chatbot with BF16 Optimization on SPR | Learn how to optimize chatbot using mixed precision on SPR | [Notebook](./notebooks/amp_optimization_on_spr.ipynb) |
 | 3.2 | Enabling Chatbot with BF16 Optimization on Habana Gaudi1/Gaudi2 | Learn how to optimze chatbot using mixed precision on Habana Gaudi1/Gaudi2 | [Notebook](./notebooks/amp_optimization_on_habana_gaudi.ipynb) |
 
@@ -154,27 +154,6 @@
  "print(\" Play Output Audio ......\")\n",
  "IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
  ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Access Finetune Service"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from neural_chat import FinetuningClientExecutor\n",
- "executor = FinetuningClientExecutor()\n",
- "tuning_status = executor(\n",
- " server_ip=\"127.0.0.1\", # master server ip\n",
- " port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
- " )"
- ]
  }
  ],
  "metadata": {
 
@@ -167,27 +167,6 @@
  "print(\" Play Output Audio ......\")\n",
  "IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
  ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Access Finetune Service"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from neural_chat import FinetuningClientExecutor\n",
- "executor = FinetuningClientExecutor()\n",
- "tuning_status = executor(\n",
- " server_ip=\"127.0.0.1\", # master server ip\n",
- " port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
- " )"
- ]
  }
  ],
  "metadata": {
 
@@ -167,27 +167,6 @@
  "print(\" Play Output Audio ......\")\n",
  "IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
  ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Access Finetune Service"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from neural_chat import FinetuningClientExecutor\n",
- "executor = FinetuningClientExecutor()\n",
- "tuning_status = executor(\n",
- " server_ip=\"127.0.0.1\", # master server ip\n",
- " port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
- " )"
- ]
  }
  ],
  "metadata": {
 
@@ -169,27 +169,6 @@
  "print(\" Play Output Audio ......\")\n",
  "IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
  ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Access Finetune Service"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from intel_extension_for_transformers.neural_chat import FinetuningClientExecutor\n",
- "executor = FinetuningClientExecutor()\n",
- "tuning_status = executor(\n",
- " server_ip=\"127.0.0.1\", # master server ip\n",
- " port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
- " )"
- ]
  }
  ],
  "metadata": {
 
@@ -197,27 +197,6 @@
  "print(\" Play Output Audio ......\")\n",
  "IPython.display.display(IPython.display.Audio(\"welcome.wav\"))\n"
  ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Access Finetune Service"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from neural_chat import FinetuningClientExecutor\n",
- "executor = FinetuningClientExecutor()\n",
- "tuning_status = executor(\n",
- " server_ip=\"127.0.0.1\", # master server ip\n",
- " port=8000 # master server port (port on socket 0, if both sockets are deployed)\n",
- " )"
- ]
  }
  ],
  "metadata": {
 
@@ -17,7 +17,7 @@
 
 from abc import ABC
 from typing import List
-import os
+import os, types
 from fastchat.conversation import get_conv_template, Conversation
 from ..config import GenerationConfig
 from ..plugins import is_plugin_enabled, get_plugin_instance, get_registered_plugins, plugins
@@ -135,6 +135,11 @@ def predict_stream(self, query, config=None):
  if not os.path.exists(query):
  raise ValueError(f"The audio file path {query} is invalid.")
 
+ query_include_prompt = False
+ self.get_conv_template(self.model_name, config.task)
+ if self.conv_template.roles[0] in query and self.conv_template.roles[1] in query:
+ query_include_prompt = True
+
  # plugin pre actions
  for plugin_name in get_registered_plugins():
  if is_plugin_enabled(plugin_name):
@@ -150,18 +155,25 @@ def predict_stream(self, query, config=None):
  if plugin_name == "safety_checker" and response:
  return "Your query contains sensitive words, please try another query."
  else:
- query = response
+ if response != None and response != False:
+ query = response
  assert query is not None, "Query cannot be None."
 
- query = self.prepare_prompt(query, self.model_name, config.task)
+ if not query_include_prompt:
+ query = self.prepare_prompt(query, self.model_name, config.task)
  response = predict_stream(**construct_parameters(query, self.model_name, self.device, config))
 
+ def is_generator(obj):
+ return isinstance(obj, types.GeneratorType)
+
  # plugin post actions
  for plugin_name in get_registered_plugins():
  if is_plugin_enabled(plugin_name):
  plugin_instance = get_plugin_instance(plugin_name)
  if plugin_instance:
  if hasattr(plugin_instance, 'post_llm_inference_actions'):
+ if plugin_name == "safety_checker" and is_generator(response):
+ continue
  response = plugin_instance.post_llm_inference_actions(response)
 
  # clear plugins config
@@ -195,6 +207,11 @@ def predict(self, query, config=None):
  if not os.path.exists(query):
  raise ValueError(f"The audio file path {query} is invalid.")
 
+ query_include_prompt = False
+ self.get_conv_template(self.model_name, config.task)
+ if self.conv_template.roles[0] in query and self.conv_template.roles[1] in query:
+ query_include_prompt = True
+
  # plugin pre actions
  for plugin_name in get_registered_plugins():
  if is_plugin_enabled(plugin_name):
@@ -214,8 +231,9 @@ def predict(self, query, config=None):
  query = response
  assert query is not None, "Query cannot be None."
 
+ if not query_include_prompt:
+ query = self.prepare_prompt(query, self.model_name, config.task)
  # LLM inference
- query = self.prepare_prompt(query, self.model_name, config.task)
  response = predict(**construct_parameters(query, self.model_name, self.device, config))
 
  # plugin post actions
 
@@ -52,6 +52,6 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
  if "neural-chat-7b-v2" in model_path.lower():
  return get_conv_template("neural-chat-7b-v2")
  else:
- return get_conv_template("neural-chat-7b-v1.1")
+ return get_conv_template("neural-chat-7b-v1-1")
 
 register_model_adapter(NeuralChatModel)
@@ -17,10 +17,10 @@
 
 # pylint: disable=wrong-import-position
 from typing import Any
-
+import os
 import gptcache.processor.post
 import gptcache.processor.pre
-from gptcache import Cache, Config
+from gptcache import Cache, cache, Config
 from gptcache.adapter.adapter import adapt
 from gptcache.embedding import (
  Onnx,
@@ -55,8 +55,11 @@
 import time
 
 class ChatCache:
- def __init__(self):
- self.cache_obj = Cache()
+ def __init__(self, config_dir: str=os.path.join(
+ os.path.dirname(os.path.abspath(__file__)), "./cache_config.yaml"),
+ embedding_model_dir: str="hkunlp/instructor-large"):
+ self.cache_obj = cache
+ self.init_similar_cache_from_config(config_dir, embedding_model_dir)
 
  def _cache_data_converter(self, cache_data):
  return self._construct_resp_from_cache(cache_data)
@@ -130,8 +133,7 @@ def init_similar_cache(self, data_dir: str = "api_cache", pre_func=get_prompt,
  config=config,
  )
 
- def init_similar_cache_from_config(self, config_dir: str="./config/cache_config.yml",
- embedding_model_dir: str="hkunlp/instructor-large"):
+ def init_similar_cache_from_config(self, config_dir, embedding_model_dir):
  import_ruamel()
  from ruamel.yaml import YAML # pylint: disable=C0415
 
 
@@ -1 +1,2 @@
 gptcache
+git+https://github.com/UKPLab/sentence-transformers.git
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`gptcache`
	`2`	`+git+https://github.com/UKPLab/sentence-transformers.git`