intel
diff --git a/‎examples/huggingface/pytorch/language-modeling/quantization/README.md‎
Lines changed: 8 additions & 2 deletions b/‎examples/huggingface/pytorch/language-modeling/quantization/README.md‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎examples/huggingface/pytorch/language-modeling/quantization/requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎examples/huggingface/pytorch/language-modeling/quantization/requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/huggingface/pytorch/language-modeling/quantization/run_clm_no_trainer.py‎
Lines changed: 5 additions & 0 deletions b/‎examples/huggingface/pytorch/language-modeling/quantization/run_clm_no_trainer.py‎
Lines changed: 5 additions & 0 deletions
@@ -30,13 +30,15 @@ Here is how to run the scripts:
 ```bash
 # "--sq" is used to enable smooth quant
 # "--int8_bf16_mixed" is used to enable int8-bf16 mixed mode for platform that natively supports bf16
+# "--peft_model_id" is used to loaded PEFT weights from peft_model_id
 python run_clm_no_trainer.py \
  --model EleutherAI/gpt-j-6B \
  --quantize \
  --sq \
  --alpha 1.0 \
  --output_dir "saved_results" \
  --ipex \
+ --peft_model_id "peft_model_id"
 ```
 
 ```bash
@@ -70,14 +72,16 @@ python run_clm_no_trainer.py \
 ```bash
 # "--sq" is used to enable smooth quant
 # "--int8_bf16_mixed" is used to enable int8-bf16 mixed mode for platform that natively supports bf16
+# "--peft_model_id" is used to loaded PEFT weights from peft_model_id
 python run_clm_no_trainer.py \
  --model facebook/opt-2.7b \
  --quantize \
  --sq \
  --alpha 0.5 \
  --ipex \
  --output_dir "saved_results" \
- --int8_bf16_mixed
+ --int8_bf16_mixed \
+ --peft_model_id "peft_model_id"
 ```
 
 #### Accuracy with lm_eval
@@ -99,14 +103,16 @@ python run_clm_no_trainer.py \
 ```bash
 # "--sq" is used to enable smooth quant
 # "--int8_bf16_mixed" is used to enable int8-bf16 mixed mode for platform that natively supports bf16
+# "--peft_model_id" is used to loaded PEFT weights from peft_model_id
 python run_clm_no_trainer.py \
  --model decapoda-research/llama-7b-hf \
  --quantize \
  --sq \
  --alpha 0.8 \
  --ipex \
  --output_dir "saved_results" \
- --int8_bf16_mixed
+ --int8_bf16_mixed \
+ --peft_model_id "peft_model_id"
 ```
 
 #### Accuracy with lm_eval
 
@@ -9,3 +9,4 @@ wandb
 einops
 neural-compressor
 git+https://github.com/EleutherAI/lm-evaluation-harness.git@83dbfbf6070324f3e5872f63e49d49ff7ef4c9b3
+git+https://github.com/huggingface/peft.git@6c44096c7b8d55a2ecf24be9bc68393467e1584a
@@ -52,6 +52,7 @@
 parser.add_argument("--weight_only_group", type=int, default=-1)
 parser.add_argument("--weight_only_scheme", default="sym")
 parser.add_argument("--weight_only_sym_full_range", action="store_true")
+parser.add_argument("--peft_model_id", type=str, default=None, help="model_name_or_path of peft model")
 
 args = parser.parse_args()
 if args.ipex:
@@ -185,6 +186,10 @@ def get_user_model():
  )
  tokenizer = AutoTokenizer.from_pretrained(args.model)
 
+ if args.peft_model_id is not None:
+ from peft import PeftModel
+ user_model = PeftModel.from_pretrained(user_model, args.peft_model_id)
+
  # to channels last
  user_model = user_model.to(memory_format=torch.channels_last)
  user_model.eval()