intel
diff --git a/‎intel_extension_for_transformers/neural_chat/config.py‎
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/README.md‎
Lines changed: 47 additions & 0 deletions b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/README.md‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/run_text_chat.py‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/run_text_chat.py‎ b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/run_text_chat.py‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/run_text_chat.py‎
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/textbot.yaml‎
Lines changed: 32 additions & 0 deletions b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/gaudi/textbot.yaml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/README.md‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/README.md‎
Lines changed: 1 addition & 0 deletions b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/README.md‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/run.sh‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/run.sh‎ b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/run.sh‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/run.sh‎
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/run_text_chat.py‎
Lines changed: 26 additions & 0 deletions b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/run_text_chat.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/textbot.yaml‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/textbot.yaml‎ b/‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/textbot.yaml‎ renamed to ‎intel_extension_for_transformers/neural_chat/examples/deployment/textbot/backend/xeon/textbot.yaml‎
@@ -402,6 +402,7 @@ class LoadingModelConfig:
  use_hpu_graphs: bool = False
  use_cache: bool = True
  use_deepspeed: bool = False
+ world_size: int = 1
  ipex_int8: bool = False
  use_llm_runtime: bool = False
 
 
@@ -0,0 +1,47 @@
+This README is intended to guide you through setting up the backend for a text chatbot using the NeuralChat framework. You can deploy this text chatbot on various platforms, including Intel XEON Scalable Processors, Habana's Gaudi processors (HPU), Intel Data Center GPU and Client GPU, Nvidia Data Center GPU and Client GPU.
+
+This textbot shows how to deploy chatbot backend on Habana's Gaudi processors (HPU).
+
+# Setup Conda
+
+First, you need to install and configure the Conda environment:
+
+```shell
+# Download and install Miniconda
+wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
+bash Miniconda*.sh
+source ~/.bashrc
+```
+
+# Install Python dependencies
+
+Install dependencies using pip
+
+>**Note**: Please make sure transformers version is 4.34.1
+```bash
+pip install ../../../../../requirements_hpu.txt
+pip install transformers==4.34.1
+```
+
+# Configure the textbot.yaml
+
+You can customize the configuration file 'textbot.yaml' to match your environment setup. Here's a table to help you understand the configurable options:
+
+| Item | Value |
+| ------------------- | --------------------------------------- |
+| host | 127.0.0.1 |
+| port | 8000 |
+| model_name_or_path | "meta-llama/Llama-2-7b-chat-hf" |
+| device | "hpu" |
+| use_deepspeed | true |
+| world_size | 8 |
+| tasks_list | ['textchat'] |
+
+
+
+# Run the TextChat server
+To start the TextChat server, use the following command:
+
+```shell
+nohup python run_text_chat.py &
+```
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is the parameter configuration file for NeuralChat Serving.
+
+#################################################################################
+# SERVER SETTING #
+#################################################################################
+host: 0.0.0.0
+port: 8000
+
+model_name_or_path: "Phind/Phind-CodeLlama-34B-v2"
+device: "hpu"
+use_deepspeed: true
+world_size: 8
+
+# task choices = ['textchat', 'voicechat', 'retrieval', 'text2image', 'finetune']
+tasks_list: ['textchat']
@@ -1,5 +1,6 @@
 This README is intended to guide you through setting up the backend for a text chatbot using the NeuralChat framework. You can deploy this text chatbot on various platforms, including Intel XEON Scalable Processors, Habana's Gaudi processors (HPU), Intel Data Center GPU and Client GPU, Nvidia Data Center GPU and Client GPU.
 
+This textbot shows how to deploy chatbot backend on Intel XEON Scalable Processors.
 
 # Setup Conda
 
 
@@ -0,0 +1,26 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from intel_extension_for_transformers.neural_chat import NeuralChatServerExecutor
+
+def main():
+ server_executor = NeuralChatServerExecutor()
+ server_executor(config_file="./textbot.yaml", log_file="./textbot.log")
+
+if __name__ == "__main__":
+ main()
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,6 @@`
`1`	`1`	`This README is intended to guide you through setting up the backend for a text chatbot using the NeuralChat framework. You can deploy this text chatbot on various platforms, including Intel XEON Scalable Processors, Habana's Gaudi processors (HPU), Intel Data Center GPU and Client GPU, Nvidia Data Center GPU and Client GPU.`
`2`	`2`
	`3`	`+This textbot shows how to deploy chatbot backend on Intel XEON Scalable Processors.`
`3`	`4`
`4`	`5`	`# Setup Conda`
`5`	`6`