Skip to content

Commit 3a4882a

Browse files
committed
fix Windows StreamResponse Bug; provide examples to connect to the vision model
1 parent b568ce1 commit 3a4882a

22 files changed

+843
-585
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ test_phi3*
77
**/__pycache__
88
**.egg-info
99

10-
scripts/*.ps1
10+
scripts/*.ps1
11+
scripts/*.sh

requirements-cuda.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
onnxruntime-gpu~=1.18.0
2+
onnxruntime-genai-cuda~=0.2.0

scripts/images/catdog.png

199 KB
Loading

scripts/python/get_model.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
1-
import httpx
21
import asyncio
32

3+
import httpx
4+
5+
46
async def fetch_models():
57
url = "http://localhost:6979/v1/models"
68
async with httpx.AsyncClient() as client:
79
response = await client.get(url)
810
return response.json()
911

12+
1013
async def main():
1114
models = await fetch_models()
1215
print(models)
1316

17+
1418
if __name__ == "__main__":
15-
asyncio.run(main())
19+
asyncio.run(main())

scripts/python/httpx_client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import httpx
22

3+
34
def chat_completion(url: str, payload: dict):
45
with httpx.Client() as client:
56
response = client.post(url, json=payload)
@@ -9,6 +10,7 @@ def chat_completion(url: str, payload: dict):
910
print(f"Error: {response.status_code}")
1011
print(response.text)
1112

13+
1214
# Example usage
1315
if __name__ == "__main__":
1416
url = "http://localhost:6979/v1/chat/completions"
@@ -17,6 +19,6 @@ def chat_completion(url: str, payload: dict):
1719
"model": "phi3-mini-int4",
1820
"max_tokens": 80,
1921
"temperature": 0.0,
20-
"stream": False # Set stream to False
22+
"stream": False, # Set stream to False
2123
}
22-
chat_completion(url, payload)
24+
chat_completion(url, payload)
Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
1-
import httpx
21
import asyncio
3-
import time
2+
3+
import httpx
4+
45

56
async def stream_chat_completion(url: str, payload: dict):
67
async with httpx.AsyncClient() as client:
78
async with client.stream("POST", url, json=payload) as response:
89
if response.status_code == 200:
910
async for data in response.aiter_bytes():
1011
if data:
11-
print(data.decode('utf-8'))
12+
print(data.decode("utf-8"))
1213
# time.sleep(1)
1314
else:
1415
print(f"Error: {response.status_code}")
1516
print(await response.text())
1617

18+
1719
# Example usage
1820
if __name__ == "__main__":
1921
url = "http://localhost:6979/v1/chat/completions"
@@ -22,6 +24,6 @@ async def stream_chat_completion(url: str, payload: dict):
2224
"model": "phi3-mini-int4",
2325
"max_tokens": 80,
2426
"temperature": 0.0,
25-
"stream": True
27+
"stream": True,
2628
}
27-
asyncio.run(stream_chat_completion(url, payload))
29+
asyncio.run(stream_chat_completion(url, payload))

scripts/python/httpx_client_vision.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import httpx
2-
import json
3-
from embeddedllm.protocol import ChatCompletionRequest, ChatCompletionMessageParam, CustomChatCompletionMessageParam
2+
3+
from embeddedllm.protocol import (
4+
CustomChatCompletionMessageParam,
5+
)
6+
47

58
def chat_completion(url: str, payload: dict):
69
with httpx.Client(timeout=None) as client:
@@ -11,20 +14,24 @@ def chat_completion(url: str, payload: dict):
1114
print(f"Error: {response.status_code}")
1215
print(response.text)
1316

17+
1418
# Example usage
1519
if __name__ == "__main__":
16-
IMAGE_PATH="C:\\Users\\ryzz\\VDrive\\RyzenAI\\icons8-amd-ryzen-64.png"
20+
import os
1721
import base64
1822
import mimetypes
1923

24+
current_file_path = os.path.abspath(__file__)
25+
IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
26+
2027
# Function to encode the image and infer its MIME type
2128
def encode_image(image_path):
2229
mime_type, _ = mimetypes.guess_type(image_path)
2330
if mime_type is None:
2431
raise ValueError("Could not infer the MIME type of the image.")
2532

2633
with open(image_path, "rb") as image_file:
27-
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
34+
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
2835

2936
return mime_type, base64_image
3037

@@ -50,15 +57,15 @@ def encode_image(image_path):
5057
],
5158
}
5259
]
53-
60+
5461
payload = {
5562
"messages": messages,
5663
"model": "phi3-mini-int4",
5764
"max_tokens": 80,
5865
"temperature": 0.0,
59-
"stream": False # Set stream to False
66+
"stream": False, # Set stream to False
6067
}
6168
# print(data)
6269
# print(messages)
6370
# print(data.messages[0].content[0])
64-
chat_completion(url, payload)
71+
chat_completion(url, payload)

scripts/python/httpx_client_vision_stream.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1-
import httpx
21
import asyncio
3-
import time
2+
3+
import httpx
4+
45

56
async def stream_chat_completion(url: str, payload: dict):
67
async with httpx.AsyncClient(timeout=None) as client:
78
async with client.stream("POST", url, json=payload) as response:
89
if response.status_code == 200:
910
async for data in response.aiter_bytes():
1011
if data:
11-
print(data.decode('utf-8'))
12+
print(data.decode("utf-8"))
1213
# time.sleep(0.1)
1314
else:
1415
print(f"Error: {response.status_code}")
@@ -17,8 +18,10 @@ async def stream_chat_completion(url: str, payload: dict):
1718

1819
# Example usage
1920
if __name__ == "__main__":
20-
# IMAGE_PATH="C:\\Users\\ryzz\\VDrive\\RyzenAI\\icons8-amd-ryzen-64.png"
21-
IMAGE_PATH="C:\\Users\\ryzz\\VDrive\\sampleimage.png"
21+
import os
22+
23+
current_file_path = os.path.abspath(__file__)
24+
IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
2225
import base64
2326
import mimetypes
2427

@@ -29,7 +32,7 @@ def encode_image(image_path):
2932
raise ValueError("Could not infer the MIME type of the image.")
3033

3134
with open(image_path, "rb") as image_file:
32-
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
35+
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
3336

3437
return mime_type, base64_image
3538

@@ -55,12 +58,12 @@ def encode_image(image_path):
5558
],
5659
}
5760
]
58-
61+
5962
payload = {
6063
"messages": messages,
6164
"model": "phi3-mini-int4",
6265
"max_tokens": 80,
6366
"temperature": 0.0,
64-
"stream": True
67+
"stream": True,
6568
}
66-
asyncio.run(stream_chat_completion(url, payload))
69+
asyncio.run(stream_chat_completion(url, payload))

scripts/python/litellm_client.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
1-
import litellm
1+
import litellm
22

33
messages = [{"role": "user", "content": "Hey, how's it going?"}]
44

55
response = litellm.completion(
6-
model="phi3-mini-int4", # pass the vllm model name
7-
messages=messages,
8-
api_base="http://localhost:6979/v1",
9-
api_key="EMPTY",
10-
temperature=0,
11-
max_tokens=80, stream=True,
12-
custom_llm_provider="openai")
6+
model="phi3-mini-int4", # pass the vllm model name
7+
messages=messages,
8+
api_base="http://localhost:6979/v1",
9+
api_key="EMPTY",
10+
temperature=0,
11+
max_tokens=80,
12+
stream=True,
13+
custom_llm_provider="openai",
14+
)
1315

1416
for part in response:
15-
print(part.choices[0].delta.content or "")
17+
print(part.choices[0].delta.content or "")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import litellm
2+
import base64
3+
import mimetypes
4+
5+
import os
6+
7+
current_file_path = os.path.abspath(__file__)
8+
IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
9+
10+
11+
# Function to encode the image and infer its MIME type
12+
def encode_image(image_path):
13+
mime_type, _ = mimetypes.guess_type(image_path)
14+
if mime_type is None:
15+
raise ValueError("Could not infer the MIME type of the image.")
16+
17+
with open(image_path, "rb") as image_file:
18+
base64_image = base64.b64encode(image_file.read()).decode("utf-8")
19+
20+
return mime_type, base64_image
21+
22+
23+
# Getting the base64 string and MIME type
24+
mime_type, base64_image = encode_image(IMAGE_PATH)
25+
26+
url = "http://localhost:6979/v1/chat/completions"
27+
# print(f"data:{mime_type};base64,{base64_image}")
28+
string_url = f"data:{mime_type};base64,{base64_image}"
29+
# data = ChatCompletionMessageParam(**payload["messages"])
30+
31+
messages = [
32+
{
33+
"role": "user",
34+
"content": [
35+
{"type": "text", "text": "What is in this image?"},
36+
{
37+
"type": "image_url",
38+
"image_url": {
39+
"url": string_url,
40+
},
41+
},
42+
],
43+
}
44+
]
45+
# messages = [{"role": "user", "content": "Hey, how's it going?"}]
46+
47+
response = litellm.completion(
48+
model="phi3-mini-int4", # pass the vllm model name
49+
messages=messages,
50+
api_base="http://localhost:6979/v1",
51+
api_key="EMPTY",
52+
temperature=0,
53+
max_tokens=80,
54+
stream=True,
55+
custom_llm_provider="openai",
56+
)
57+
58+
for part in response:
59+
print(part.choices[0].delta.content or "")

0 commit comments

Comments
 (0)