11import os
2-
2+ from loguru import logger
33import dotenv
44
55dotenv .load_dotenv ()
1111 'MODEL_NAME' : '' ,
1212 'MODEL_PATH' : '' ,
1313 'ADAPTER_MODEL_PATH' : '' ,
14+
1415 'DEVICE' : 'cuda' ,
1516 'DEVICE_MAP' : "" ,
1617 'GPUS' : '' ,
1718 'NUM_GPUs' : 1 ,
18- 'QUANTIZE' : 16 ,
19+
1920 'EMBEDDING_NAME' : '' ,
20- 'CONTEXT_LEN' : '' ,
21+ 'EMBEDDING_SIZE' : '' ,
22+ 'EMBEDDING_DEVICE' : 'cuda' ,
23+
24+ 'QUANTIZE' : 16 ,
2125 'LOAD_IN_8BIT' : 'False' ,
2226 'LOAD_IN_4BIT' : 'False' ,
2327 'USING_PTUNING_V2' : 'False' ,
28+
29+ 'CONTEXT_LEN' : '' ,
2430 'STREAM_INTERVERL' : 2 ,
2531 'PROMPT_NAME' : '' ,
32+
2633 'PATCH_TYPE' : '' ,
2734 'TRAINING_LENGTH' : 4096 ,
2835 'WINDOW_SIZE' : 512 ,
36+
2937 'API_PREFIX' : '/v1' ,
38+
3039 'USE_VLLM' : 'False' ,
3140 'TRUST_REMOTE_CODE' : "False" ,
3241 'TOKENIZE_MODE' : "auto" ,
3342 'TENSOR_PARALLEL_SIZE' : 1 ,
3443 'DTYPE' : "half" ,
35- 'EMBEDDING_SIZE' : '' ,
36- 'EMBEDDING_DEVICE' : 'cuda' ,
44+ "GPU_MEMORY_UTILIZATION" : 0.9 ,
45+ "MAX_NUM_BATCHED_TOKENS" : 5120 ,
46+ "MAX_NUM_SEQS" : 256 ,
3747}
3848
3949
@@ -61,15 +71,19 @@ def __init__(self):
6171 self .GPUS = get_env ('GPUS' )
6272 self .NUM_GPUs = int (get_env ('NUM_GPUs' ))
6373
64- self .QUANTIZE = int (get_env ('QUANTIZE' ))
6574 self .EMBEDDING_NAME = get_env ('EMBEDDING_NAME' ) if get_env ('EMBEDDING_NAME' ) else None
66- self .CONTEXT_LEN = int (get_env ('CONTEXT_LEN' )) if get_env ('CONTEXT_LEN' ) else None
75+ self .EMBEDDING_SIZE = int (get_env ('EMBEDDING_SIZE' )) if get_env ('EMBEDDING_SIZE' ) else None
76+ self .EMBEDDING_DEVICE = get_env ('EMBEDDING_DEVICE' )
77+
78+ self .QUANTIZE = int (get_env ('QUANTIZE' ))
6779 self .LOAD_IN_8BIT = get_bool_env ('LOAD_IN_8BIT' )
6880 self .LOAD_IN_4BIT = get_bool_env ('LOAD_IN_4BIT' )
6981 self .USING_PTUNING_V2 = get_bool_env ('USING_PTUNING_V2' )
7082
83+ self .CONTEXT_LEN = int (get_env ('CONTEXT_LEN' )) if get_env ('CONTEXT_LEN' ) else None
7184 self .STREAM_INTERVERL = int (get_env ('STREAM_INTERVERL' ))
7285 self .PROMPT_NAME = get_env ('PROMPT_NAME' ) if get_env ('PROMPT_NAME' ) else None
86+
7387 self .PATCH_TYPE = get_env ('PATCH_TYPE' ) if get_env ('PATCH_TYPE' ) else None
7488 self .TRAINING_LENGTH = int (get_env ('TRAINING_LENGTH' ))
7589 self .WINDOW_SIZE = int (get_env ('WINDOW_SIZE' ))
@@ -81,13 +95,13 @@ def __init__(self):
8195 self .TOKENIZE_MODE = get_env ('TOKENIZE_MODE' )
8296 self .TENSOR_PARALLEL_SIZE = int (get_env ('TENSOR_PARALLEL_SIZE' ))
8397 self .DTYPE = get_env ('DTYPE' )
84-
85- self .EMBEDDING_SIZE = int (get_env ('EMBEDDING_SIZE ' )) if get_env ( 'EMBEDDING_SIZE' ) else None
86- self .EMBEDDING_DEVICE = get_env ('EMBEDDING_DEVICE' )
98+ self . GPU_MEMORY_UTILIZATION = float ( get_env ( 'GPU_MEMORY_UTILIZATION' ))
99+ self .MAX_NUM_BATCHED_TOKENS = int (get_env ('MAX_NUM_BATCHED_TOKENS ' ))
100+ self .MAX_NUM_SEQS = int ( get_env ('MAX_NUM_SEQS' ) )
87101
88102
89103config = Config ()
90- print (f"Config: { config .__dict__ } " )
104+ logger . debug (f"Config: { config .__dict__ } " )
91105if config .GPUS :
92106 if len (config .GPUS .split ("," )) < config .NUM_GPUs :
93107 raise ValueError (
0 commit comments