Skip to content

Commit 13e17aa

Browse files
masnesralpytorchmergebot
authored andcommitted
Make the CUTLASS swizzle options configurable and default to 2. (#146088)
Pull Request resolved: #146088 Approved by: https://github.com/henrylhtsang, https://github.com/mlazos
1 parent aac0577 commit 13e17aa

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

torch/_inductor/codegen/cuda/gemm_template.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,7 @@ def _add_cutlass_gemm_choices(
502502

503503
ops = self.gen_ops()
504504
for name, op in ops:
505-
for swizzle in (1, 2, 4, 8):
505+
for swizzle in inductor_cuda_config.cutlass_max_profiling_swizzle_options:
506506
description = f"{name} swizzle={swizzle}"
507507
self.maybe_append_choice(
508508
choices, description=description, op=op, swizzle=swizzle

torch/_inductor/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,9 @@ class cuda:
12451245
# This is mainly used to reduce test time in CI.
12461246
cutlass_max_profiling_configs: Optional[int] = None
12471247

1248+
# The L2 swizzle values to consider when profiling CUTLASS configs in max_autotune.
1249+
cutlass_max_profiling_swizzle_options: list[int] = [2]
1250+
12481251
# Path to CUDA NVCC.
12491252
# NVCC search order:
12501253
# 1) cuda_cxx set in this config

0 commit comments

Comments
 (0)