EmbeddedLLM
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎requirements-cuda.txt‎
Lines changed: 2 additions & 0 deletions b/‎requirements-cuda.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎scripts/images/catdog.png‎
199 KB b/‎scripts/images/catdog.png‎
199 KB
diff --git a/‎scripts/python/get_model.py‎
Lines changed: 6 additions & 2 deletions b/‎scripts/python/get_model.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎scripts/python/httpx_client.py‎
Lines changed: 4 additions & 2 deletions b/‎scripts/python/httpx_client.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎scripts/python/httpx_client_stream.py‎
Lines changed: 7 additions & 5 deletions b/‎scripts/python/httpx_client_stream.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎scripts/python/httpx_client_vision.py‎
Lines changed: 14 additions & 7 deletions b/‎scripts/python/httpx_client_vision.py‎
Lines changed: 14 additions & 7 deletions
diff --git a/‎scripts/python/httpx_client_vision_stream.py‎
Lines changed: 12 additions & 9 deletions b/‎scripts/python/httpx_client_vision_stream.py‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎scripts/python/litellm_client.py‎
Lines changed: 11 additions & 9 deletions b/‎scripts/python/litellm_client.py‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎scripts/python/litellm_vision_client.py‎
Lines changed: 59 additions & 0 deletions b/‎scripts/python/litellm_vision_client.py‎
Lines changed: 59 additions & 0 deletions
@@ -7,4 +7,5 @@ test_phi3*
 **/__pycache__
 **.egg-info
 
-scripts/*.ps1
+scripts/*.ps1
+scripts/*.sh
@@ -0,0 +1,2 @@
+onnxruntime-gpu~=1.18.0
+onnxruntime-genai-cuda~=0.2.0
@@ -1,15 +1,19 @@
-import httpx
 import asyncio
 
+import httpx
+
+
 async def fetch_models():
  url = "http://localhost:6979/v1/models"
  async with httpx.AsyncClient() as client:
  response = await client.get(url)
  return response.json()
 
+
 async def main():
  models = await fetch_models()
  print(models)
 
+
 if __name__ == "__main__":
- asyncio.run(main())
+ asyncio.run(main())
@@ -1,5 +1,6 @@
 import httpx
 
+
 def chat_completion(url: str, payload: dict):
  with httpx.Client() as client:
  response = client.post(url, json=payload)
@@ -9,6 +10,7 @@ def chat_completion(url: str, payload: dict):
  print(f"Error: {response.status_code}")
  print(response.text)
 
+
 # Example usage
 if __name__ == "__main__":
  url = "http://localhost:6979/v1/chat/completions"
@@ -17,6 +19,6 @@ def chat_completion(url: str, payload: dict):
  "model": "phi3-mini-int4",
  "max_tokens": 80,
  "temperature": 0.0,
- "stream": False # Set stream to False
+ "stream": False, # Set stream to False
  }
- chat_completion(url, payload)
+ chat_completion(url, payload)
@@ -1,19 +1,21 @@
-import httpx
 import asyncio
-import time
+
+import httpx
+
 
 async def stream_chat_completion(url: str, payload: dict):
  async with httpx.AsyncClient() as client:
  async with client.stream("POST", url, json=payload) as response:
  if response.status_code == 200:
  async for data in response.aiter_bytes():
  if data:
- print(data.decode('utf-8'))
+ print(data.decode("utf-8"))
  # time.sleep(1)
  else:
  print(f"Error: {response.status_code}")
  print(await response.text())
 
+
 # Example usage
 if __name__ == "__main__":
  url = "http://localhost:6979/v1/chat/completions"
@@ -22,6 +24,6 @@ async def stream_chat_completion(url: str, payload: dict):
  "model": "phi3-mini-int4",
  "max_tokens": 80,
  "temperature": 0.0,
- "stream": True
+ "stream": True,
  }
- asyncio.run(stream_chat_completion(url, payload))
+ asyncio.run(stream_chat_completion(url, payload))
@@ -1,6 +1,9 @@
 import httpx
-import json
-from embeddedllm.protocol import ChatCompletionRequest, ChatCompletionMessageParam, CustomChatCompletionMessageParam
+
+from embeddedllm.protocol import (
+ CustomChatCompletionMessageParam,
+)
+
 
 def chat_completion(url: str, payload: dict):
  with httpx.Client(timeout=None) as client:
@@ -11,20 +14,24 @@ def chat_completion(url: str, payload: dict):
  print(f"Error: {response.status_code}")
  print(response.text)
 
+
 # Example usage
 if __name__ == "__main__":
- IMAGE_PATH="C:\\Users\\ryzz\\VDrive\\RyzenAI\\icons8-amd-ryzen-64.png"
+ import os
  import base64
  import mimetypes
 
+ current_file_path = os.path.abspath(__file__)
+ IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
+
  # Function to encode the image and infer its MIME type
  def encode_image(image_path):
  mime_type, _ = mimetypes.guess_type(image_path)
  if mime_type is None:
  raise ValueError("Could not infer the MIME type of the image.")
 
  with open(image_path, "rb") as image_file:
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
+ base64_image = base64.b64encode(image_file.read()).decode("utf-8")
 
  return mime_type, base64_image
 
@@ -50,15 +57,15 @@ def encode_image(image_path):
  ],
  }
  ]
- 
+
  payload = {
  "messages": messages,
  "model": "phi3-mini-int4",
  "max_tokens": 80,
  "temperature": 0.0,
- "stream": False # Set stream to False
+ "stream": False, # Set stream to False
  }
  # print(data)
  # print(messages)
  # print(data.messages[0].content[0])
- chat_completion(url, payload)
+ chat_completion(url, payload)
@@ -1,14 +1,15 @@
-import httpx
 import asyncio
-import time
+
+import httpx
+
 
 async def stream_chat_completion(url: str, payload: dict):
  async with httpx.AsyncClient(timeout=None) as client:
  async with client.stream("POST", url, json=payload) as response:
  if response.status_code == 200:
  async for data in response.aiter_bytes():
  if data:
- print(data.decode('utf-8'))
+ print(data.decode("utf-8"))
  # time.sleep(0.1)
  else:
  print(f"Error: {response.status_code}")
@@ -17,8 +18,10 @@ async def stream_chat_completion(url: str, payload: dict):
 
 # Example usage
 if __name__ == "__main__":
- # IMAGE_PATH="C:\\Users\\ryzz\\VDrive\\RyzenAI\\icons8-amd-ryzen-64.png"
- IMAGE_PATH="C:\\Users\\ryzz\\VDrive\\sampleimage.png"
+ import os
+
+ current_file_path = os.path.abspath(__file__)
+ IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
  import base64
  import mimetypes
 
@@ -29,7 +32,7 @@ def encode_image(image_path):
  raise ValueError("Could not infer the MIME type of the image.")
 
  with open(image_path, "rb") as image_file:
- base64_image = base64.b64encode(image_file.read()).decode('utf-8')
+ base64_image = base64.b64encode(image_file.read()).decode("utf-8")
 
  return mime_type, base64_image
 
@@ -55,12 +58,12 @@ def encode_image(image_path):
  ],
  }
  ]
- 
+
  payload = {
  "messages": messages,
  "model": "phi3-mini-int4",
  "max_tokens": 80,
  "temperature": 0.0,
- "stream": True 
+ "stream": True,
  }
- asyncio.run(stream_chat_completion(url, payload))
+ asyncio.run(stream_chat_completion(url, payload))
@@ -1,15 +1,17 @@
-import litellm 
+import litellm
 
 messages = [{"role": "user", "content": "Hey, how's it going?"}]
 
 response = litellm.completion(
- model="phi3-mini-int4", # pass the vllm model name
- messages=messages,
- api_base="http://localhost:6979/v1",
- api_key="EMPTY",
- temperature=0,
- max_tokens=80, stream=True,
- custom_llm_provider="openai")
+ model="phi3-mini-int4", # pass the vllm model name
+ messages=messages,
+ api_base="http://localhost:6979/v1",
+ api_key="EMPTY",
+ temperature=0,
+ max_tokens=80,
+ stream=True,
+ custom_llm_provider="openai",
+)
 
 for part in response:
- print(part.choices[0].delta.content or "")
+ print(part.choices[0].delta.content or "")
@@ -0,0 +1,59 @@
+import litellm
+import base64
+import mimetypes
+
+import os
+
+current_file_path = os.path.abspath(__file__)
+IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
+
+
+# Function to encode the image and infer its MIME type
+def encode_image(image_path):
+ mime_type, _ = mimetypes.guess_type(image_path)
+ if mime_type is None:
+ raise ValueError("Could not infer the MIME type of the image.")
+
+ with open(image_path, "rb") as image_file:
+ base64_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+ return mime_type, base64_image
+
+
+# Getting the base64 string and MIME type
+mime_type, base64_image = encode_image(IMAGE_PATH)
+
+url = "http://localhost:6979/v1/chat/completions"
+# print(f"data:{mime_type};base64,{base64_image}")
+string_url = f"data:{mime_type};base64,{base64_image}"
+# data = ChatCompletionMessageParam(**payload["messages"])
+
+messages = [
+ {
+ "role": "user",
+ "content": [
+ {"type": "text", "text": "What is in this image?"},
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": string_url,
+ },
+ },
+ ],
+ }
+]
+# messages = [{"role": "user", "content": "Hey, how's it going?"}]
+
+response = litellm.completion(
+ model="phi3-mini-int4", # pass the vllm model name
+ messages=messages,
+ api_base="http://localhost:6979/v1",
+ api_key="EMPTY",
+ temperature=0,
+ max_tokens=80,
+ stream=True,
+ custom_llm_provider="openai",
+)
+
+for part in response:
+ print(part.choices[0].delta.content or "")
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+onnxruntime-gpu~=1.18.0`
	`2`	`+onnxruntime-genai-cuda~=0.2.0`