This repository was archived by the owner on Oct 25, 2024. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +3
-2
lines changed
intel_extension_for_transformers/neural_chat Expand file tree Collapse file tree 2 files changed +3
-2
lines changed Original file line number Diff line number Diff line change @@ -911,7 +911,7 @@ def generate_output():
911911 max_new_tokens = max_new_tokens ,
912912 ctx_size = max_new_tokens ,
913913 ignore_prompt = True ,
914- interactive = True ,
914+ interactive = False if "magicoder" in model_name . lower () else True ,
915915 do_sample = do_sample ,
916916 num_beams = num_beams ,
917917 n_keep = 2 if "chatglm" in model_name .lower () else 1
Original file line number Diff line number Diff line change @@ -161,6 +161,7 @@ def init(self, config):
161161 compute_dtype = yaml_config .get ("compute_dtype" , {})
162162 weight_dtype = yaml_config .get ("weight_dtype" , {})
163163 use_cached_bin = yaml_config .get ("use_cached_bin" , {})
164+ use_ggml = yaml_config .get ("use_ggml" , False )
164165 mix_precision_dtype = yaml_config .get ("mix_precision_dtype" , {})
165166 load_in_4bit = yaml_config .get ("load_in_4bit" , {})
166167 bnb_4bit_quant_type = yaml_config .get ("bnb_4bit_quant_type" , {})
@@ -172,7 +173,7 @@ def init(self, config):
172173 from intel_extension_for_transformers .transformers import WeightOnlyQuantConfig , MixedPrecisionConfig
173174 if optimization_type == "weight_only" :
174175 optimization_config = WeightOnlyQuantConfig (compute_dtype = compute_dtype , weight_dtype = weight_dtype ,
175- use_cache = use_cached_bin )
176+ use_ggml = use_ggml , use_cache = use_cached_bin )
176177 elif optimization_type == "mix_precision" :
177178 optimization_config = MixedPrecisionConfig (dtype = mix_precision_dtype )
178179 elif optimization_type == "bits_and_bytes" :
You can’t perform that action at this time.
0 commit comments