[None][fix] Fix is_post_quant_all2all_supported for MNNVL (NVIDIA#8355)

yuantailing · govind-ramnarayan · commit 019cd07fba3c · 2025-10-21T10:30:50.000-07:00
Signed-off-by: Tailing Yuan &lt;yuantailing@gmail.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py
@@ -445,7 +445,7 @@ def is_post_quant_all2all_supported(self):
         if not self.use_postquant_alltoall:
             return False
         if self.alltoall_method_type == AlltoallMethodType.MNNVL:
-            return False
+            return True
         elif self.alltoall_method_type == AlltoallMethodType.DeepEP:
             return self.has_nvfp4
         elif self.alltoall_method_type == AlltoallMethodType.DeepEPLowLatency: