|
8 | 8 | from safetensors.torch import load_file |
9 | 9 | from torch import nn |
10 | 10 |
|
| 11 | +from vllm.config import ModelConfig, VllmConfig |
11 | 12 | from vllm.config.lora import LoRAConfig |
12 | 13 | from vllm.lora.layers import (ColumnParallelLinearWithLoRA, |
13 | 14 | MergedColumnParallelLinearWithLoRA, |
14 | 15 | RowParallelLinearWithLoRA) |
15 | | -from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights |
| 16 | +from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights |
16 | 17 | from vllm.lora.models import (LoRAMapping, LoRAModel, LoRAModelManager, |
17 | 18 | LRUCacheLoRAModelManager) |
18 | 19 | from vllm.lora.peft_helper import PEFTHelper |
@@ -435,10 +436,19 @@ def test_lru_cache_worker_adapter_manager(dist_init, dummy_model, device, |
435 | 436 | target_modules=["layer1.dense1", "dense2"], |
436 | 437 | lora_dtype=DEFAULT_DTYPE, |
437 | 438 | ) |
| 439 | + |
| 440 | + model_config = ModelConfig(max_model_len=16) |
| 441 | + vllm_config = VllmConfig(model_config=model_config, |
| 442 | + lora_config=lora_config) |
| 443 | + |
| 444 | + vllm_config.scheduler_config.max_num_seqs = 4 |
| 445 | + vllm_config.scheduler_config.max_num_batched_tokens = 2 |
438 | 446 | worker_adapter_manager = LRUCacheWorkerLoRAManager( |
439 | | - 4, 2, |
440 | | - dummy_model.unpadded_vocab_size - lora_config.lora_extra_vocab_size, |
441 | | - lora_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) |
| 447 | + vllm_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) |
| 448 | + |
| 449 | + worker_adapter_manager.max_num_seqs = 4 |
| 450 | + worker_adapter_manager.max_num_batched_tokens = 2 |
| 451 | + |
442 | 452 | worker_adapter_manager.create_lora_manager(dummy_model) |
443 | 453 |
|
444 | 454 | mapping = LoRAMapping([], []) |
@@ -517,10 +527,20 @@ def test_worker_adapter_manager(dist_init, dummy_model_gate_up, device, |
517 | 527 | max_cpu_loras=4, |
518 | 528 | max_loras=4, |
519 | 529 | lora_dtype=DEFAULT_DTYPE) |
520 | | - worker_adapter_manager = WorkerLoRAManager( |
521 | | - 4, 2, dummy_model_gate_up.unpadded_vocab_size - |
522 | | - lora_config.lora_extra_vocab_size, lora_config, device, |
523 | | - EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) |
| 530 | + |
| 531 | + model_config = ModelConfig(max_model_len=16) |
| 532 | + vllm_config = VllmConfig(model_config=model_config, |
| 533 | + lora_config=lora_config) |
| 534 | + |
| 535 | + vllm_config.scheduler_config.max_num_seqs = 4 |
| 536 | + vllm_config.scheduler_config.max_num_batched_tokens = 2 |
| 537 | + |
| 538 | + worker_adapter_manager = WorkerLoRAManager(vllm_config, device, |
| 539 | + EMBEDDING_MODULES, |
| 540 | + EMBEDDING_PADDING_MODULES) |
| 541 | + worker_adapter_manager.vocab_size = ( |
| 542 | + dummy_model_gate_up.unpadded_vocab_size - |
| 543 | + lora_config.lora_extra_vocab_size) |
524 | 544 | worker_adapter_manager.create_lora_manager(dummy_model_gate_up) |
525 | 545 |
|
526 | 546 | dummy_lora_files = f"{tmp_path}/lora_adapter" |
|
0 commit comments