Skip to content

Commit 1d1e10d

Browse files
committed
review changes
1 parent 33c0508 commit 1d1e10d

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

backends/openvino/quantizer/llm_compression.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def get_calibration_data(
2626
This method is used to obtain calibration data from a prompt so that the algorithm
2727
is calibrated not only with the dataset but also the inputs which are output-ed by
2828
the model.
29+
Currently, this method is only tested with Llama models.
2930
"""
3031
# TODO: change criteria & support batch inputs if necessary
3132
pos = torch.tensor(0, dtype=torch.int64)
@@ -53,6 +54,7 @@ def get_calibration_data(
5354
def transform_fn(token_pos_map: tuple[int, int]):
5455
"""
5556
Transforms and returns input from dataset so that it is acceptable by the model
57+
Currently, this method is only tested with Llama models
5658
5759
:param token_pos_map: This input contains the posiition and its token ID
5860
"""

backends/openvino/quantizer/quantizer.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,15 @@ def __init__(
134134

135135
def get_weights_compression_config(self) -> Dict[str, Any]:
136136
"""
137-
Returns the config attributes like all_layers, group_size, backup_mode and Quantization mode
138-
from the quantizer's internal algorithm. These attributes are defined when initializing the quantizer
139-
and is used by the compress pt2e's experimental algorithm.
140-
141-
:return: A dictionary containing elements which are defined in the quantizer and their value from the
142-
internal algorithm.
137+
Returns a dictionary with all_layers, group_size, backup_mode and Quantization mode parameters
138+
used by the compress_pt2e weight compression algorithm.
139+
140+
:return: A dictionary containing:
141+
1. mode: Quantization mode. One of INT4 Sym, INT4 Asym, INT8 Sym, INT8 Asym.
142+
2. group_size: group size to be used for group-wise compression.
143+
3. all_layers: Indicates whether embeddings and last MatMul layers should be compressed to a primary
144+
precision. By default, the backup precision is assigned for the embeddings and last MatMul layers.
145+
4. backup_mode: Defines a backup mode for mixed-precision weight compression.
143146
"""
144147
quantizer_initialized_algo_attributes = {
145148
"mode": self._algo.mode,

0 commit comments

Comments
 (0)