intel
diff --git a/‎intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py‎
Lines changed: 16 additions & 0 deletions b/‎intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py‎
Lines changed: 16 additions & 0 deletions
@@ -76,6 +76,22 @@ def test_llm_runtime(self):
  print(config_type, cmpData(pt_logits.detach().numpy().flatten(), itrex_logits.flatten()))
 
 
+ def test_gguf_api(self):
+ model_name = "TheBloke/Mistral-7B-v0.1-GGUF"
+ model_file = "mistral-7b-v0.1.Q4_0.gguf"
+ tokenizer_name = "/tf_dataset2/models/pytorch/Mistral-7B-v0.1"
+
+ prompt = "Once upon a time"
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, trust_remote_code=True)
+ inputs = tokenizer(prompt, return_tensors="pt").input_ids
+ streamer = TextStreamer(tokenizer)
+
+ model = AutoModelForCausalLM.from_pretrained(model_name, model_file = model_file)
+ output = model.generate(inputs, streamer=streamer, max_new_tokens=10)
+ print("output = ", output)
+ assert(output == [[1, 5713, 3714, 264, 727, 28725, 736, 403, 264, 1628, 2746, 693, 6045, 298, 1220, 28723, 985]])
+
+
  def test_beam_search(self):
  model_name = "/tf_dataset2/models/pytorch/gpt-j-6B" # or local path to model
  prompts = [