Skip to content
This repository was archived by the owner on Oct 25, 2024. It is now read-only.

Commit ad4becd

Browse files
authored
CI add inference test for mosaicml-mpt-7b-chat (#157)
Signed-off-by: jiafu zhang <jiafu.zhang@intel.com>
1 parent e95fc32 commit ad4becd

File tree

4 files changed

+58
-10
lines changed

4 files changed

+58
-10
lines changed

.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@ name: Chatbot inference on llama-2-7b-chat-hf
33
on:
44
workflow_call:
55

6-
# If there is a new commit, the previous jobs will be canceled
76
concurrency:
8-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
7+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf-lla-7b
98
cancel-in-progress: true
109

1110
jobs:
@@ -16,18 +15,21 @@ jobs:
1615
- name: Checkout
1716
uses: actions/checkout@v2
1817

18+
- name: Load environment variables
19+
run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV
20+
1921
- name: Build Docker Image
20-
run: docker build ./ --target cpu --build-arg http_proxy="$HTTP_PROXY_IMAGE_BUILD" --build-arg https_proxy="$HTTPS_PROXY_IMAGE_BUILD" -f workflows/chatbot/inference/docker/Dockerfile -t chatbotinfer:latest && yes | docker container prune && yes | docker image prune
22+
run: docker build ./ --target cpu --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f workflows/chatbot/inference/docker/Dockerfile -t chatbotinfer:latest && yes | docker container prune && yes | docker image prune
2123

2224
- name: Start Docker Container
2325
run: |
2426
cid=$(docker ps -q --filter "name=chatbotinfer")
2527
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
26-
docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/chatbot -e http_proxy="$HTTP_PROXY_CONTAINER_RUN" -e https_proxy="$HTTPS_PROXY_CONTAINER_RUN" --name="chatbotinfer" --hostname="chatbotinfer-container" chatbotinfer:latest
28+
docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/chatbot -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotinfer" --hostname="chatbotinfer-container" chatbotinfer:latest
2729
2830
- name: Run Inference Test
2931
run: |
30-
docker exec "chatbotinfer" bash -c "cd /root/chatbot && source activate && conda activate chatbot-demo; python workflows/chatbot/inference/generate.py --base_model_path \"meta-llama/Llama-2-7b-chat-hf\" --hf_access_token \"$HF_ACCESS_TOKEN\" --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" "
32+
docker exec "chatbotinfer" bash -c "cd /root/chatbot && source activate && conda activate chatbot-demo; python workflows/chatbot/inference/generate.py --base_model_path \"meta-llama/Llama-2-7b-chat-hf\" --hf_access_token \"${{ env.HF_ACCESS_TOKEN }}\" --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" "
3133
3234
- name: Stop Container
3335
if: success() || failure()
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Chatbot inference on mosaicml/mpt-7b-chat
2+
3+
on:
4+
workflow_call:
5+
6+
concurrency:
7+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf-mpt-7b
8+
cancel-in-progress: true
9+
10+
jobs:
11+
inference:
12+
name: inference test
13+
runs-on: lms-lab
14+
steps:
15+
- name: Checkout
16+
uses: actions/checkout@v2
17+
18+
- name: Load environment variables
19+
run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV
20+
21+
- name: Build Docker Image
22+
run: docker build ./ --target cpu --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f workflows/chatbot/inference/docker/Dockerfile -t chatbotinfer:latest && yes | docker container prune && yes | docker image prune
23+
24+
- name: Start Docker Container
25+
run: |
26+
cid=$(docker ps -q --filter "name=chatbotinfer")
27+
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
28+
docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/chatbot -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotinfer" --hostname="chatbotinfer-container" chatbotinfer:latest
29+
30+
- name: Run Inference Test
31+
run: |
32+
docker exec "chatbotinfer" bash -c "cd /root/chatbot && source activate && conda activate chatbot-demo; python workflows/chatbot/inference/generate.py --base_model_path \"mosaicml/mpt-7b-chat\" --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" "
33+
34+
- name: Stop Container
35+
if: success() || failure()
36+
run: |
37+
cid=$(docker ps -q --filter "name=chatbotinfer")
38+
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi
39+
40+
- name: Test Summary
41+
run: echo "Inference completed successfully"

.github/workflows/chatbot-test.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ on:
88
- './requirements.txt'
99
- '.github/workflows/chatbot-test.yml'
1010
- '.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml'
11+
- '.github/workflows/chatbot-inference-mpt-7b-chat.yml'
1112
- 'intel_extension_for_transformers/**'
1213
- 'workflows/chatbot/inference/**'
1314
- 'workflows/dlsa/**'
@@ -25,3 +26,7 @@ jobs:
2526
call-inference-llama-2-7b-chat-hf:
2627
uses: ./.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml
2728

29+
call-inference-mpt-7b-chat:
30+
uses: ./.github/workflows/chatbot-inference-mpt-7b-chat.yml
31+
32+

workflows/chatbot/inference/generate.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -370,12 +370,12 @@ def load_model(
370370
tokenizer_name,
371371
use_fast=False if (re.search("llama", model_name, re.IGNORECASE)
372372
or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True,
373-
token=hf_access_token,
373+
use_auth_token=hf_access_token,
374374
)
375375
if re.search("flan-t5", model_name, re.IGNORECASE):
376376
with smart_context_manager(use_deepspeed=use_deepspeed):
377377
model = AutoModelForSeq2SeqLM.from_pretrained(
378-
model_name, low_cpu_mem_usage=True, token=hf_access_token
378+
model_name, low_cpu_mem_usage=True, use_auth_token=hf_access_token
379379
)
380380
elif (re.search("mpt", model_name, re.IGNORECASE)
381381
or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)):
@@ -388,7 +388,7 @@ def load_model(
388388
torch_dtype=torch.bfloat16,
389389
low_cpu_mem_usage=True,
390390
torchscript=cpu_jit,
391-
token=hf_access_token,
391+
use_auth_token=hf_access_token,
392392
)
393393
elif (
394394
re.search("gpt", model_name, re.IGNORECASE)
@@ -399,7 +399,7 @@ def load_model(
399399
):
400400
with smart_context_manager(use_deepspeed=use_deepspeed):
401401
model = AutoModelForCausalLM.from_pretrained(
402-
model_name, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, token=hf_access_token
402+
model_name, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, use_auth_token=hf_access_token
403403
)
404404
else:
405405
raise ValueError(
@@ -477,7 +477,7 @@ def load_model(
477477
from models.mpt.mpt_trace import jit_trace_mpt_7b, MPTTSModelForCausalLM
478478

479479
model = jit_trace_mpt_7b(model)
480-
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, token=hf_access_token)
480+
config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_access_token)
481481
model = MPTTSModelForCausalLM(
482482
model, config, use_cache=use_cache, model_dtype=torch.bfloat16
483483
)

0 commit comments

Comments
 (0)