[https://nvbugs/5485886][fix] Fix resource free of Eagle3ResourceManager (#7437)

kris1025 · web-flow · commit cce95568589e · 2025-09-04T17:38:13.000+08:00
Signed-off-by: linquanh &lt;linquanh@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/speculative/eagle3.py b/tensorrt_llm/_torch/speculative/eagle3.py
@@ -62,6 +62,9 @@ def update_resources(self, scheduled_batch: ScheduledRequests):
         pass
 
     def free_resources(self, request: LlmRequest):
+        slot_id = self.slot_manager.get_slot(request.request_id)
+        self.seq_lens[slot_id] = 0
+        self.start_indices[slot_id] = 0
         self.slot_manager.remove_slot(request.request_id)
 
     def add_dummy_requests(self, request_ids: List[int]):
diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -2365,7 +2365,7 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
     def test_eagle3(self, enable_chunked_prefill, eagle3_one_model):
         pytorch_config = dict(
             disable_overlap_scheduler=True,
-            cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
+            cuda_graph_config=CudaGraphConfig(),
         )
         kv_cache_config = KvCacheConfig(
             enable_block_reuse=False,

Original file line number	Diff line number	Diff line change
`@@ -2365,7 +2365,7 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,`
`2365`	`2365`	`def test_eagle3(self, enable_chunked_prefill, eagle3_one_model):`
`2366`	`2366`	`pytorch_config = dict(`
`2367`	`2367`	`disable_overlap_scheduler=True,`
`2368`		`- cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),`
	`2368`	`+ cuda_graph_config=CudaGraphConfig(),`
`2369`	`2369`	`)`
`2370`	`2370`	`kv_cache_config = KvCacheConfig(`
`2371`	`2371`	`enable_block_reuse=False,`