Skip to content

Commit cce9556

Browse files
authored
[https://nvbugs/5485886][fix] Fix resource free of Eagle3ResourceManager (#7437)
Signed-off-by: linquanh <linquanh@nvidia.com>
1 parent ced5512 commit cce9556

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

tensorrt_llm/_torch/speculative/eagle3.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ def update_resources(self, scheduled_batch: ScheduledRequests):
6262
pass
6363

6464
def free_resources(self, request: LlmRequest):
65+
slot_id = self.slot_manager.get_slot(request.request_id)
66+
self.seq_lens[slot_id] = 0
67+
self.start_indices[slot_id] = 0
6568
self.slot_manager.remove_slot(request.request_id)
6669

6770
def add_dummy_requests(self, request_ids: List[int]):

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2365,7 +2365,7 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
23652365
def test_eagle3(self, enable_chunked_prefill, eagle3_one_model):
23662366
pytorch_config = dict(
23672367
disable_overlap_scheduler=True,
2368-
cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
2368+
cuda_graph_config=CudaGraphConfig(),
23692369
)
23702370
kv_cache_config = KvCacheConfig(
23712371
enable_block_reuse=False,

0 commit comments

Comments
 (0)