Skip to content

Commit f65fbb5

Browse files
committed
[CUDA] Only use vec128 if CUDA version is newer than 12.8
1 parent 861d2cc commit f65fbb5

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

aten/src/ATen/native/cuda/CUDALoops.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ constexpr auto sum_of_sizes(args_t args, std::index_sequence<Is...>) {
7878
}
7979
}
8080

81-
#ifdef USE_ROCM
81+
#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION < 12080)
8282
template <int io_sizes>
8383
constexpr auto elems_per_thread(){
8484
if constexpr (io_sizes == 1) {
@@ -219,7 +219,7 @@ static inline void launch_vectorized_kernel(
219219
constexpr auto io_size = calc_io_size<func_t>();
220220
int64_t grid = (N + io_block_work_size<io_size>() - 1) / io_block_work_size<io_size>();
221221
auto stream = at::cuda::getCurrentCUDAStream();
222-
#ifdef USE_ROCM
222+
#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION < 12080)
223223
int vec_size = memory::can_vectorize_up_to<func_t>(data);
224224
#else
225225
using cpp_type = typename function_traits<func_t>::result_type;

aten/src/ATen/native/cuda/MemoryAccess.cuh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,9 @@ inline C10_HOST_DEVICE int can_vectorize_up_to(const char *pointer) {
486486
uint64_t address = reinterpret_cast<uint64_t>(pointer);
487487
constexpr int vec2_alignment = std::alignment_of_v<aligned_vector<scalar_t, 2>>;
488488
constexpr int vec4_alignment = std::alignment_of_v<aligned_vector<scalar_t, 4>>;
489+
#if defined(USE_ROCM) || (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)
489490
constexpr int vec8_alignment = std::alignment_of_v<aligned_vector<scalar_t, 8>>;
491+
#endif
490492
#ifdef USE_ROCM
491493
constexpr int vec16_alignment = std::alignment_of_v<aligned_vector<scalar_t, 16>>;
492494
constexpr int type_size = sizeof(scalar_t);
@@ -495,7 +497,7 @@ inline C10_HOST_DEVICE int can_vectorize_up_to(const char *pointer) {
495497
} else if (type_size <= 2 && (address % vec8_alignment == 0)) {
496498
return 8;
497499
} else
498-
#else
500+
#elif (defined(CUDA_VERSION) && CUDA_VERSION >= 12080)
499501
if (address % vec8_alignment == 0) {
500502
return 8;
501503
} else

0 commit comments

Comments
 (0)