pytorch-tpu
diff --git a/‎tests/quantization/autoawq/test_awq.py‎
Lines changed: 7 additions & 6 deletions b/‎tests/quantization/autoawq/test_awq.py‎
Lines changed: 7 additions & 6 deletions
@@ -101,7 +101,11 @@ class AwqTest(unittest.TestCase):
 
  EXPECTED_OUTPUT = "Hello my name is Katie and I am a 20 year old student at the University of North Carolina at Chapel Hill. I am a junior and I am majoring in Journalism and minoring in Spanish"
  EXPECTED_OUTPUT_BF16 = "Hello my name is Katie and I am a 20 year old student at the University of North Carolina at Chapel Hill. I am a junior and I am majoring in Exercise and Sport Science with a"
- EXPECTED_OUTPUT_EXLLAMA = "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very out"
+
+ EXPECTED_OUTPUT_EXLLAMA = [
+ "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very out",
+ "Hello my name is Katie and I am a 20 year old student from the UK. I am currently studying for a degree in English Literature and History at the University of York. I am a very creative",
+ ]
  device_map = "cuda"
 
  # called only once for all test in this class
@@ -111,10 +115,7 @@ def setUpClass(cls):
  Setup quantized model
  """
  cls.tokenizer = AutoTokenizer.from_pretrained(cls.model_name)
- cls.quantized_model = AutoModelForCausalLM.from_pretrained(
- cls.model_name,
- device_map=cls.device_map,
- )
+ cls.quantized_model = AutoModelForCausalLM.from_pretrained(cls.model_name, device_map=cls.device_map)
 
  def tearDown(self):
  gc.collect()
@@ -204,7 +205,7 @@ def test_quantized_model_exllama(self):
  )
 
  output = quantized_model.generate(**input_ids, max_new_tokens=40)
- self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_EXLLAMA)
+ self.assertIn(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT_EXLLAMA)
 
  def test_quantized_model_no_device_map(self):
  """