Make the CUTLASS swizzle options configurable and default to 2. (#146088)

masnesral · pytorchmergebot · commit 13e17aa10639 · 2025-02-04T22:07:26.000Z
Pull Request resolved: #146088 Approved by: https://github.com/henrylhtsang, https://github.com/mlazos
diff --git a/torch/_inductor/codegen/cuda/gemm_template.py b/torch/_inductor/codegen/cuda/gemm_template.py
@@ -502,7 +502,7 @@ def _add_cutlass_gemm_choices(
 
         ops = self.gen_ops()
         for name, op in ops:
-            for swizzle in (1, 2, 4, 8):
+            for swizzle in inductor_cuda_config.cutlass_max_profiling_swizzle_options:
                 description = f"{name} swizzle={swizzle}"
                 self.maybe_append_choice(
                     choices, description=description, op=op, swizzle=swizzle
diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py
@@ -1245,6 +1245,9 @@ class cuda:
     # This is mainly used to reduce test time in CI.
     cutlass_max_profiling_configs: Optional[int] = None
 
+    # The L2 swizzle values to consider when profiling CUTLASS configs in max_autotune.
+    cutlass_max_profiling_swizzle_options: list[int] = [2]
+
     # Path to CUDA NVCC.
     # NVCC search order:
     # 1) cuda_cxx set in this config