Skip to content

Commit 60d7473

Browse files
committed
Update on "[Quant][fx][bc-breaking] Rename fx/*patterns.py"
Summary: This commit renames fx/quantization_patterns.py to fx/quantize_handler.py, and fx/fusion_patterns.py to fx/fuse_handler.py. This is because these files contain only QuantizeHandler and FuseHandler respectively, so the new names are more descriptive. A future commit will further break BC by removing all the empty *QuantizeHandler classes. BC-breaking notes: The following classes under the `torch.ao.quantization.fx.quantization_patterns` namespace are migrated to the `torch.ao.quantization.fx.quantize_handler` namespace: ``` QuantizeHandler BinaryOpQuantizeHandler CatQuantizeHandler ConvReluQuantizeHandler LinearReLUQuantizeHandler BatchNormQuantizeHandler EmbeddingQuantizeHandler RNNDynamicQuantizeHandler DefaultNodeQuantizeHandler FixedQParamsOpQuantizeHandler CopyNodeQuantizeHandler GeneralTensorShapeOpQuantizeHandler CustomModuleQuantizeHandler StandaloneModuleQuantizeHandler ``` The following classes under the `torch.ao.quantization.fx.fusion_patterns` namespace are migrated to the `torch.ao.quantization.fx.fuse_handler` namespace: ``` DefaultFuseHandler FuseHandler ``` Test Plan: python test/test_quantization.py TestQuantizeFx python test/test_quantization.py TestQuantizeFxOps Reviewers: jerryzh168, vkuzo Subscribers: jerryzh168, vkuzo cc jerryzh168 jianyuh raghuramank100 jamesr66a vkuzo jgong5 Xia-Weiwen leslie-fang-intel [ghstack-poisoned]
2 parents a975aee + 3fb977d commit 60d7473

File tree

116 files changed

+2126
-1270
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+2126
-1270
lines changed

.github/ci_commit_pins/vision.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
72686211e2a8b78e5a5dc8c28be34eb9cfcdad4c
1+
a718345a8d60c73a441f6254d6eae456c8a6d787

.github/labeler.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@
4747
"NNC":
4848
- torch/csrc/jit/tensorexpr/**
4949

50-
"oncall: quantization":
50+
"release notes: quantization":
5151
- torch/ao/quantization/**
5252
- torch/quantization/**
5353
- aten/src/ATen/quantized/**
5454
- aten/src/ATen/native/quantized/cpu/**
55+
- test/quantization/**

.jenkins/pytorch/test.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ test_dynamo_shard() {
215215
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
216216
exit 1
217217
fi
218+
python tools/dynamo/verify_dynamo.py
218219
# Temporarily disable test_fx for dynamo pending the investigation on TTS
219220
# regression in https://github.com/pytorch/torchdynamo/issues/784
220221
time python test/run_test.py \
@@ -249,6 +250,7 @@ test_inductor_distributed() {
249250
}
250251

251252
test_inductor() {
253+
python tools/dynamo/verify_dynamo.py
252254
python test/run_test.py --include test_modules test_ops --verbose
253255
PYTORCH_TEST_WITH_INDUCTOR=0 python test/run_test.py --include inductor/test_torchinductor --include inductor/test_torchinductor_opinfo --verbose
254256
# TODO: investigate "RuntimeError: CUDA driver API confirmed a leak"

.lintrunner.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ include_patterns = [
148148
'torch/_dynamo/convert_frame.py',
149149
'torch/_dynamo/types.py',
150150
'torch/_dynamo/output_graph.py',
151+
'torch/_dynamo/guards.py',
151152
'torch/_dynamo/optimizations/__init__.py',
152153
'torch/_dynamo/optimizations/backends.py',
153154
'torch/_dynamo/optimizations/training.py',
@@ -779,6 +780,7 @@ include_patterns = [
779780
'torchgen/**/*.py',
780781
'functorch/functorch/_src/aot_autograd.py',
781782
'functorch/functorch/_src/compilers.py',
783+
'torch/testing/*.py',
782784
]
783785
command = [
784786
'python3',

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
/torch/autograd/ @albanD @soulitzer
1616
/tools/autograd/ @albanD @soulitzer
1717
/torch/nn/ @albanD @jbschlosser
18-
/torch/optim/ @albanD
18+
/torch/optim/ @albanD @janeyx99
1919
/test/test_public_bindings.py @albanD
2020
/test/allowlist_for_publicAPI.json @albanD @anjali411
2121
/docs/source/conf.py @albanD

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
include MANIFEST.in
22
include CMakeLists.txt
3-
include CITATION
3+
include CITATION.cff
44
include LICENSE
55
include NOTICE
66
include .gitmodules

aten/src/ATen/Context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,8 @@ const std::vector<at::QEngine>& Context::supportedQEngines() {
332332

333333
#ifdef USE_FBGEMM
334334
if (fbgemm::fbgemmSupportedCPU()) {
335-
// The X86 qengine is available if and only if FBGEMM is available
336335
engines.push_back(at::kX86);
336+
// The X86 qengine is available if and only if FBGEMM is available
337337
engines.push_back(at::kFBGEMM);
338338
}
339339
#endif

aten/src/ATen/cuda/CUDABlas.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ void gemm_and_bias(
741741
TORCH_CUDABLAS_CHECK(CUBLAS_STATUS_NOT_SUPPORTED);
742742
}
743743

744-
TORCH_CUDABLAS_CHECK(cublasLtMatmul(
744+
cublasStatus_t cublasStatus = cublasLtMatmul(
745745
ltHandle,
746746
computeDesc.descriptor(),
747747
&alpha_val,
@@ -757,7 +757,33 @@ void gemm_and_bias(
757757
&heuristicResult.algo,
758758
workspace.data_ptr(),
759759
workspaceSize,
760-
at::cuda::getCurrentCUDAStream()));
760+
at::cuda::getCurrentCUDAStream());
761+
TORCH_CHECK(
762+
cublasStatus == CUBLAS_STATUS_SUCCESS,
763+
"CUDA error: ",
764+
at::cuda::blas::_cublasGetErrorEnum(cublasStatus),
765+
" when calling cublasLtMatmul with transpose_mat1 ",
766+
transpose_mat1,
767+
" transpose_mat2 ",
768+
transpose_mat2,
769+
" m ",
770+
m,
771+
" n ",
772+
n,
773+
" k ",
774+
k,
775+
" mat1_ld ",
776+
mat1_ld,
777+
" mat2_ld ",
778+
mat2_ld,
779+
" result_ld ",
780+
result_ld,
781+
" abcType ",
782+
abcType,
783+
" computeType ",
784+
computeType,
785+
" scaleType ",
786+
scaleType);
761787
}
762788

763789
template void gemm_and_bias(

aten/src/ATen/native/cpu/batch_norm_kernel.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -789,15 +789,6 @@ void batch_norm_cpu_collect_stats_contiguous_impl<BFloat16>(
789789
}
790790
}
791791

792-
static inline std::tuple<Vectorized<float>, Vectorized<float>> load2f(const BFloat16* ptr) {
793-
return convert_bfloat16_float(Vectorized<BFloat16>::loadu(ptr));
794-
}
795-
796-
static inline std::tuple<Vectorized<float>, Vectorized<float>> load2f(const float* ptr) {
797-
using Vec = Vectorized<float>;
798-
return std::make_tuple(Vec::loadu(ptr), Vec::loadu(ptr + Vec::size()));
799-
}
800-
801792
template <typename param_t>
802793
inline void batch_norm_cpu_collect_stats_channels_last_internal(
803794
Tensor& mean, Tensor& var_sum, const Tensor& input) {

aten/src/ATen/native/cpu/group_norm_kernel.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,15 @@ void GroupNormKernelImplInternal(
5252
const bool beta_null = beta_data == nullptr;
5353
const int64_t inner_size = D * HxW;
5454

55+
using T_ACC = vec::vec_scalar_t<T>;
56+
5557
at::parallel_for(0, N * G, 1, [&](int64_t start, int64_t end) {
5658
for (const auto i : c10::irange(start, end)) {
5759
const T* X_ptr = X_data + i * inner_size;
58-
T mean_val;
59-
T rstd_val;
60+
T_ACC mean_val;
61+
T_ACC rstd_val;
6062
std::tie(mean_val, rstd_val) = RowwiseMoments(X_ptr, inner_size);
61-
rstd_val = T(1) / std::sqrt(std::max(rstd_val, T(0)) + eps);
63+
rstd_val = T_ACC(1) / std::sqrt(std::max(rstd_val, T_ACC(0)) + eps);
6264
if (gamma_null && beta_null) {
6365
T* Y_ptr = Y_data + i * inner_size;
6466
for (const auto j : c10::irange(inner_size)) {
@@ -68,8 +70,8 @@ void GroupNormKernelImplInternal(
6870
const int64_t g = i % G;
6971
for (const auto j : c10::irange(D)) {
7072
const int64_t c = g * D + j;
71-
const T scale = rstd_val * (gamma_null ? T(1) : gamma_data[c]);
72-
const T bias = -scale * mean_val + (beta_null ? T(0) : beta_data[c]);
73+
const T_ACC scale = rstd_val * (gamma_null ? T(1) : gamma_data[c]);
74+
const T_ACC bias = -scale * mean_val + (beta_null ? T(0) : beta_data[c]);
7375
X_ptr = X_data + (i * D + j) * HxW;
7476
T* Y_ptr = Y_data + (i * D + j) * HxW;
7577
for (const auto k : c10::irange(HxW)) {

0 commit comments

Comments
 (0)