Skip to content

Commit 24e11d0

Browse files
committed
Update on "[Quant][core][improvements] Combined dispatch registration for max_pool1d & quantized_max_pool1d"
Summary: This PR is part of a series of PRs addressing #54150, related to using dispatcher for calls to quantized backends as opposed to if/else conditionals. This particular PR removes the is_quantized check from max_pool1d and modifies max_pool1d_impl to be compatible with int tensors. This PR relies on #74560, which introduces structured kernel support for quantized tensors and #72353. Test plan: ``` python test/test_quantization.py -k test_max_pool1d ``` Differential Revision: [D35431831](https://our.internmc.facebook.com/intern/diff/D35431831) [ghstack-poisoned]
2 parents e15db74 + eba4796 commit 24e11d0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+913
-625
lines changed

aten/src/ATen/NestedTensorImpl.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ NestedTensorImpl::NestedTensorImpl(
2323
buffer.device()),
2424
buffer_(std::move(buffer)),
2525
nested_size_tensor_(std::move(nested_size_tensor)) {
26+
TORCH_WARN_ONCE(
27+
"The PyTorch API of nested tensors is in prototype stage and will change "
28+
"in the near future.");
2629
TORCH_INTERNAL_ASSERT(nested_size_tensor_.is_contiguous());
2730
int64_t size_dim = nested_size_tensor_.dim();
2831
TORCH_INTERNAL_ASSERT(size_dim == 0 || size_dim == 2);

aten/src/ATen/core/interned_strings.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ namespace c10 {
247247
_(onnx, Less) \
248248
_(onnx, LessOrEqual) \
249249
_(onnx, Not) \
250-
_(onnx, ATen) \
250+
_(aten, ATen) \
251251
_(onnx, Split) \
252252
_(onnx, ConstantOfShape) \
253253
_(onnx, Cast) \
@@ -316,7 +316,8 @@ namespace c10 {
316316
_(attr, new_axis) \
317317
_(attr, warn_id) \
318318
_(attr, allowzero) \
319-
_(attr, seen_none)
319+
_(attr, seen_none) \
320+
_(attr, overload_name)
320321

321322
enum class _keys : unique_t {
322323
#define DEFINE_KEY(ns, s) ns##_##s,

aten/src/ATen/native/Blas.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ Tensor dot(const Tensor &self, const Tensor &other){
165165
return r;
166166
}
167167

168-
return AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(at::ScalarType::BFloat16, self.scalar_type(), "dot", [&] {
168+
return AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(at::ScalarType::Half, at::ScalarType::BFloat16, self.scalar_type(), "dot", [&] {
169169
Tensor result = at::empty({}, self.options());
170170
result.fill_(dot_impl<scalar_t>(self.numel(), self.data_ptr<scalar_t>(), self.stride(0), other.data_ptr<scalar_t>(), other.stride(0)));
171171
return result;

aten/src/ATen/native/LinearAlgebra.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,7 +1223,7 @@ static void addmm_impl_cpu_(
12231223
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(!c.is_conj());
12241224

12251225
// Apply BLAS routine
1226-
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16,
1226+
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(kHalf, kBFloat16,
12271227
result.scalar_type(), "addmm_impl_cpu_",
12281228
[&]{
12291229
at::native::cpublas::gemm(
@@ -1428,6 +1428,20 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens
14281428
// is_bmm_out: true for bmm_out, false for baddbmm_
14291429
// self_or_result is "self" for baddbmm_ and "result" for bmm_out
14301430
Tensor& self_or_result = const_cast<Tensor&>(self_or_result_);
1431+
CheckedFrom c = (is_bmm_out ? "bmm" : "baddbmm");
1432+
1433+
auto checkOnCPU = [](const Tensor& t, CheckedFrom c) {
1434+
TORCH_CHECK(
1435+
!t.is_cuda(),
1436+
"Expect tensor to have CPU backend, but got tensor with ",
1437+
toString(t.options().backend()),
1438+
" Backend (while checking arguments for ",
1439+
c);
1440+
};
1441+
1442+
checkOnCPU(self_or_result, c);
1443+
checkOnCPU(batch1, c);
1444+
checkOnCPU(batch2, c);
14311445

14321446
const auto batch1_sizes = batch1.sizes();
14331447
const auto batch2_sizes = batch2.sizes();
@@ -1464,15 +1478,16 @@ static inline void bmm_out_or_baddbmm_(const Tensor& self_or_result_, const Tens
14641478

14651479
if (contraction_size * res_rows * res_cols < 400) {
14661480
if (is_bmm_out) {
1467-
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16, batch1.scalar_type(), "bmm", [&] {
1481+
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(kHalf, kBFloat16, batch1.scalar_type(), "bmm", [&] {
14681482
baddbmm_cpu_kernel<scalar_t, true>(self_or_result, batch1, batch2, beta, alpha);
14691483
});
14701484
} else {
1471-
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND(kBFloat16, batch1.scalar_type(), "baddbmm", [&] {
1485+
AT_DISPATCH_ALL_TYPES_AND_COMPLEX_AND2(kHalf, kBFloat16, batch1.scalar_type(), "baddbmm", [&] {
14721486
baddbmm_cpu_kernel<scalar_t, false>(self_or_result, batch1, batch2, beta, alpha);
14731487
});
14741488
}
14751489
} else if (at::hasMKL() && ((
1490+
self_or_result.scalar_type() != kHalf &&
14761491
self_or_result.scalar_type() != kBFloat16 &&
14771492
at::native::is_floating_point(self_or_result)) ||
14781493
at::native::is_complex(self_or_result))

aten/src/ATen/native/SoftMax.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <ATen/NamedTensorUtils.h>
1010

1111
#include <c10/core/TensorOptions.h>
12+
#include <c10/macros/Macros.h>
1213
#include <c10/util/irange.h>
1314

1415
namespace at {
@@ -148,7 +149,7 @@ void host_softmax(
148149
int64_t grain_size = std::min(internal::GRAIN_SIZE / dim_size, (int64_t)1);
149150
parallel_for(
150151
0, outer_size * inner_size, grain_size,
151-
[&](int64_t begin, int64_t end) {
152+
[&](int64_t begin, int64_t end) __ubsan_ignore_float_divide_by_zero__ {
152153
for (const auto i : c10::irange(begin, end)) {
153154
int64_t outer_idx = i / inner_size;
154155
int64_t inner_idx = i % inner_size;

aten/src/ATen/native/cuda/PersistentSoftmax.cuh

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,11 @@ __global__ void softmax_warp_forward(output_t *dst, const input_t *src, int batc
167167
elements[i][it] = std::exp(elements[i][it] - max_value[i]);
168168
sum[i] += elements[i][it];
169169
}
170+
} else {
171+
if (!is_log_softmax) {
172+
// Masked values are treated as -infinity, and std::exp(-infinity) is 0.
173+
elements[i][it] = 0;
174+
}
170175
}
171176
}
172177
}
@@ -183,16 +188,6 @@ __global__ void softmax_warp_forward(output_t *dst, const input_t *src, int batc
183188
for (int it = 0; it < WARP_ITERATIONS; ++it) {
184189
int element_index = local_idx + it * WARP_SIZE;
185190
if (element_index < element_count) {
186-
if (is_masked) {
187-
int idx = it*WARP_SIZE;
188-
if (!is_transformer_mask) {
189-
idx += i*element_count;
190-
}
191-
if (mask[idx]) {
192-
dst[i*element_count+it*WARP_SIZE] = 0;
193-
continue;
194-
}
195-
}
196191
if (is_log_softmax) {
197192
dst[i*element_count+it*WARP_SIZE] = elements[i][it] - max_value[i] - sum[i];
198193
} else {

aten/src/ATen/test/basic.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,7 @@ void TestOnesAndDot(DeprecatedTypeProperties& type) {
4141
Tensor b = ones({3, 4}, type);
4242
ASSERT_EQ_RESOLVED((b + b).sum().item<double>(), 24);
4343
ASSERT_EQ_RESOLVED(b.numel(), 12);
44-
if (type.backend() != Backend::CPU || type.scalarType() != kHalf) {
45-
ASSERT_EQ_RESOLVED(b.view(-1).dot(b.view(-1)).item<double>(), 12);
46-
}
44+
ASSERT_EQ_RESOLVED(b.view(-1).dot(b.view(-1)).item<double>(), 12);
4745
}
4846

4947
void TestSort(DeprecatedTypeProperties& type) {

binaries/bench_gen/bench_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def main(args):
5959

6060
if __name__ == "__main__":
6161
parser = argparse.ArgumentParser(
62-
description="Utilitity to generate Caffe2 benchmark models.")
62+
description="Utility to generate Caffe2 benchmark models.")
6363
parser.add_argument("operator", help="Caffe2 operator to benchmark.")
6464
parser.add_argument("-b", "--blob",
6565
help="Instantiate a blob --blob name=dim1,dim2,dim3",

caffe2/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1943,6 +1943,8 @@ if(BUILD_PYTHON)
19431943
# ---[ Python.
19441944
if(BUILD_CAFFE2)
19451945
add_library(caffe2_pybind11_state MODULE ${Caffe2_CPU_PYTHON_SRCS})
1946+
target_compile_definitions(torch PRIVATE BUILD_CAFFE2)
1947+
target_compile_definitions(torch_python PRIVATE BUILD_CAFFE2)
19461948
if(USE_NUMPY)
19471949
target_compile_options(caffe2_pybind11_state PRIVATE "-DUSE_NUMPY")
19481950
target_link_libraries(caffe2_pybind11_state PRIVATE numpy::numpy)

caffe2/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import warnings
2+
3+
4+
try:
5+
from caffe2.proto import caffe2_pb2
6+
except ImportError:
7+
warnings.warn("Caffe2 was not built with this PyTorch build. "
8+
"Please enable Caffe2 by building PyTorch from source with `BUILD_CAFFE2=1` flag.")

0 commit comments

Comments
 (0)