Add quantized CELU operator by adding additional parameters to quantized ELU

durumu · durumu · commit f9c3c37857f6 · 2020-06-23T14:10:47.000-07:00
ghstack-source-id: f2649e0 Pull Request resolved: #39199 Updated ELU to accept additional parameters ghstack-source-id: f2649e0 Pull Request resolved: #39200 Added tests ghstack-source-id: f2649e0 Pull Request resolved: #39201 Improved tests to fail when formula is wrong ghstack-source-id: f2649e0 Pull Request resolved: #39202
diff --git a/aten/src/ATen/native/Activation.cpp b/aten/src/ATen/native/Activation.cpp
@@ -181,11 +181,15 @@ Tensor & selu_(Tensor & self) {
 }
 
 Tensor celu(const Tensor & self, Scalar alpha) {
+  TORCH_CHECK(alpha.to<double>() != 0,
+      "ZeroDivisionError: alpha cannot be 0 for CELU");
   double inv_alpha = 1. / alpha.to<double>();
   return at::elu(self, alpha, Scalar(1.0), Scalar(inv_alpha));
 }
 
 Tensor & celu_(Tensor & self, Scalar alpha) {
+  TORCH_CHECK(alpha.to<double>() != 0,
+      "ZeroDivisionError: alpha cannot be 0 for CELU");
   double inv_alpha = 1. / alpha.to<double>();
   return at::elu_(self, alpha, Scalar(1.0), Scalar(inv_alpha));
 }
diff --git a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp
@@ -793,7 +793,17 @@ void qtanh_kernel(const Tensor& qx, Tensor& qy) {
   });
 }
 
-void qelu_kernel(const Tensor& qx, Scalar alpha, Tensor& qy) {
+void qelu_kernel(
+    const Tensor& qx,
+    Scalar alpha,
+    Scalar scale,
+    Scalar input_scale,
+    Tensor& qy) {
+  // scale and input_scale arguments refer to a generalized ELU formula
+  // if x >= 0, ELU(x) = x * scale
+  // if x <= 0, ELU(x) = (exp(x * input_scale) - 1) * scale
+  // in the normal ELU formula, both are equal to 1
+  // they are NOT related to the quantization scale term
 
   int64_t i_zp = qx.q_zero_point();
   float i_scale = qx.q_scale();
@@ -805,6 +815,8 @@ void qelu_kernel(const Tensor& qx, Scalar alpha, Tensor& qy) {
   float inv_o_scale = 1.0 / o_scale;
 
   float alpha_float = alpha.to<float>();
+  float scale_coef = scale.to<float>();
+  float input_scale_coef = input_scale.to<float>();
 
   AT_DISPATCH_QINT_TYPES(qx.scalar_type(), "qelu_kernel", [&] {
 
@@ -817,6 +829,8 @@ void qelu_kernel(const Tensor& qx, Scalar alpha, Tensor& qy) {
     Vec zero_vec = Vec(0.0f);
     Vec one_vec = Vec(1.0f);
     Vec alpha_vec = Vec(alpha_float);
+    Vec scale_coef_vec = Vec(scale_coef);
+    Vec input_scale_coef_vec = Vec(input_scale_coef);
     Vec i_scale_vec = Vec(i_scale);
     Vec i_zero_point_vec = Vec((float)i_zp);
     Vec i_scale_neg_zp_premul_vec = i_scale_vec * i_zero_point_vec.neg();
@@ -828,8 +842,9 @@ void qelu_kernel(const Tensor& qx, Scalar alpha, Tensor& qy) {
         const auto x = at::native::dequantize_val(i_scale, i_zp, value_qx);
         // ELU
         const auto y = x >= 0
-          ? x
-          : (alpha_float * (std::exp(x) - 1));
+          ? x * scale_coef
+          : ((std::exp(x * input_scale_coef) - 1) * alpha_float * scale_coef);
+
         // quantize
         return at::native::quantize_val<scalar_t>(o_scale, o_zp, y);
       },
@@ -846,13 +861,16 @@ void qelu_kernel(const Tensor& qx, Scalar alpha, Tensor& qy) {
 
             Vec dx_vec_copy_neg_elu = dx_vec_vec[idx] * one_vec;
             // calculate the negative part of ELU on the copy
+            dx_vec_copy_neg_elu = dx_vec_copy_neg_elu * input_scale_coef_vec;
             dx_vec_copy_neg_elu = dx_vec_copy_neg_elu.exp();
             dx_vec_copy_neg_elu = dx_vec_copy_neg_elu - one_vec;
             dx_vec_copy_neg_elu = dx_vec_copy_neg_elu * alpha_vec;
             // blend
             dx_vec_vec[idx] = Vec::blendv(dx_vec_copy_neg_elu, dx_vec_vec[idx],
                                         dx_vec_vec[idx] > zero_vec);
           }
+
+          dx_vec_vec[idx] = dx_vec_vec[idx] * scale_coef_vec;
         }
         // quantize
         return qVec::quantize(dx_vec_vec, o_scale, o_zp, inv_o_scale);
diff --git a/aten/src/ATen/native/quantized/cpu/qelu.cpp b/aten/src/ATen/native/quantized/cpu/qelu.cpp
@@ -11,14 +11,21 @@ DEFINE_DISPATCH(qelu_stub);
 
 Tensor quantized_elu(
     const Tensor& qx, double output_scale, int64_t output_zero_point, Scalar alpha, Scalar scale, Scalar input_scale) {
-  Tensor qy = at::_empty_affine_quantized(qx.sizes(), qx.options(),
-      output_scale, output_zero_point);
-  qelu_stub(qx.device().type(), qx, alpha, qy);
+  Tensor qy = at::_empty_affine_quantized(qx.sizes(), qx.options(), output_scale, output_zero_point);
+  qelu_stub(qx.device().type(), qx, alpha, scale, input_scale, qy);
   return qy;
 }
 
+Tensor quantized_celu(const Tensor& qx, double output_scale, int64_t output_zero_point, Scalar alpha) {
+  TORCH_CHECK(alpha.to<double>() != 0,
+      "ZeroDivisionError: alpha cannot be 0 for CELU");
+  double inv_alpha = 1. / alpha.to<double>();
+  return quantized_elu(qx, output_scale, output_zero_point, alpha, Scalar(1.0), Scalar(inv_alpha));
+}
+
 TORCH_LIBRARY_IMPL(quantized, QuantizedCPU, m) {
   m.impl("elu", quantized_elu);
+  m.impl("celu", quantized_celu);
 }
 
 }}  // namespace at::native
diff --git a/aten/src/ATen/native/quantized/cpu/quantized_ops.h b/aten/src/ATen/native/quantized/cpu/quantized_ops.h
@@ -24,6 +24,8 @@ using qtanh_fn = void (*)(const at::Tensor& /*qx*/, at::Tensor& /*qy*/);
 using qelu_fn = void(*)(
     const at::Tensor& /*qx*/,
     Scalar /*alpha*/,
+    Scalar /*scale*/,
+    Scalar /*input_scale*/,
     at::Tensor& /*qy*/);
 using qbinary_fn =
     void (*)(Tensor& /*out*/, const Tensor& /*self*/, const Tensor& /*other*/);
diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp
@@ -77,6 +77,7 @@ TORCH_LIBRARY(quantized, m) {
   m.def("conv3d_dilation(__torch__.torch.classes.quantized.Conv3dPackedParamsBase packed_weights) -> int[]");
   m.def("conv3d_groups(__torch__.torch.classes.quantized.Conv3dPackedParamsBase packed_weights) -> int");
   m.def("elu(Tensor self, float output_scale, int output_zero_point, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor");
+  m.def("celu(Tensor self, float output_scale, int output_zero_point, Scalar alpha=1) -> Tensor");
   m.def("hardswish(Tensor input, float output_scale, int output_zero_point) -> Tensor");
   m.def("group_norm(Tensor input, int num_groups, Tensor? weight, Tensor? bias, float eps, float output_scale, int output_zero_point) -> Tensor");
   m.def("instance_norm(Tensor input, Tensor? weight, Tensor? bias, float eps, float output_scale, int output_zero_point) -> Tensor");
diff --git a/benchmarks/operator_benchmark/pt/qactivation_test.py b/benchmarks/operator_benchmark/pt/qactivation_test.py
@@ -52,6 +52,7 @@
         ('functional.hardtanh', nnq.functional.hardtanh),
         ('functional.hardswish', nnq.functional.hardswish),
         ('functional.elu', nnq.functional.elu),
+        ('functional.celu', nnq.functional.celu),
         ('functional.hardsigmoid', nnq.functional.hardsigmoid),
         ('functional.leaky_relu', nnq.functional.leaky_relu),
         ('functional.sigmoid', torch.nn.functional.sigmoid),
diff --git a/test/quantization/test_quantized_op.py b/test/quantization/test_quantized_op.py
@@ -324,6 +324,33 @@ def test_qelu(self, X, alpha):
         self.assertEqual(qY, qY_hat,
                          msg="F.elu failed ({} vs {})".format(qY, qY_hat))
 
+
+    """Tests the correctness of the quantized::celu op."""
+    @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5),
+                       elements=hu.floats(-1e2, 1e2, allow_nan=False, allow_infinity=False),
+                       qparams=hu.qparams(scale_max=9.999999747378752e-06)),
+           alpha=st.floats(0.01, 100.0, allow_nan=False, allow_infinity=False))
+    def test_qcelu(self, X, alpha):
+        X, (scale, zero_point, torch_type) = X
+
+        X = torch.from_numpy(X)
+        qX = torch.quantize_per_tensor(X, scale=scale, zero_point=zero_point,
+                                       dtype=torch_type)
+        op = torch.nn.quantized.functional.celu
+
+        # calculate ELU(dqX) and quantize
+        dqX = qX.dequantize()
+        dqY_hat = dqX.clone()
+        dqY_hat[dqX < 0] = alpha * (torch.exp(dqY_hat[dqX < 0] / alpha) - 1.)
+        qY_hat = torch.quantize_per_tensor(dqY_hat, scale=scale, zero_point=zero_point,
+                                           dtype=torch_type)
+
+        # test regular
+        qY = op(qX, alpha=alpha)
+        self.assertEqual(qY, qY_hat,
+                         msg="F.celu failed ({} vs {})".format(qY, qY_hat))
+
+
     """Tests the correctness of the quantized::qlayer_norm op."""
     @skipIfNoFBGEMM
     def test_qlayer_norm(self):
diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
@@ -1178,6 +1178,9 @@
 - name: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
   self: elu_backward(grad, alpha, scale, input_scale, result)
 
+- name: celu(Tensor self, Scalar alpha=1.0) -> Tensor
+  self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), result)
+
 - name: gelu(Tensor self) -> Tensor
   self: "GradMode::is_enabled() ? infinitely_differentiable_gelu_backward(grad, self) : gelu_backward(grad, self)"
 
diff --git a/torch/csrc/jit/passes/quantization/helper.cpp b/torch/csrc/jit/passes/quantization/helper.cpp
@@ -21,6 +21,7 @@ std::vector<std::string> _static_quantizable_call_funcs = {
     "batch_norm",
     "hardswish",
     "elu",
+    "celu",
     "layer_norm",
     "group_norm",
     "instance_norm",
@@ -37,6 +38,8 @@ std::vector<std::string> _static_quantizable_aten_funcs = {
     "hardswish_",
     "elu",
     "elu_",
+    "celu",
+    "celu_",
     "batch_norm",
     "layer_norm",
     "group_norm",
diff --git a/torch/nn/quantized/functional.py b/torch/nn/quantized/functional.py
@@ -362,6 +362,23 @@ def max_pool2d(input, kernel_size, stride=None, padding=0, dilation=1,
     return torch.nn.functional.max_pool2d(input, kernel_size, stride, padding,
                                           dilation, ceil_mode, return_indices)
 
+def celu(input, alpha=1.):
+    # type: (Tensor, Optional[float], Optional[bool]) -> Tensor
+    r"""celu(input, alpha=1.) -> Tensor
+
+    Applies the quantized CELU function element-wise.
+    .. math::
+        \text{CELU}(x) = \max(0,x) + \min(0, \alpha * (\exp(x / \alpha) - 1))
+
+    Args:
+        input: quantized input
+        alpha: the :math:`\alpha` value for the CELU formulation. Default: 1.0
+    """
+    if not input.is_quantized:
+        raise ValueError("Input to 'quantized.celu' must be quantized!")
+    return torch.celu(input, alpha)
+
+
 def relu(input, inplace=False):
     # type: (Tensor, bool) -> Tensor
     r"""relu(input, inplace=False) -> Tensor

Original file line number	Diff line number	Diff line change
`@@ -181,11 +181,15 @@ Tensor & selu_(Tensor & self) {`
`181`	`181`	`}`
`182`	`182`
`183`	`183`	`Tensor celu(const Tensor & self, Scalar alpha) {`
	`184`	`+ TORCH_CHECK(alpha.to<double>() != 0,`
	`185`	`+ "ZeroDivisionError: alpha cannot be 0 for CELU");`
`184`	`186`	`double inv_alpha = 1. / alpha.to<double>();`
`185`	`187`	`return at::elu(self, alpha, Scalar(1.0), Scalar(inv_alpha));`
`186`	`188`	`}`
`187`	`189`
`188`	`190`	`Tensor & celu_(Tensor & self, Scalar alpha) {`
	`191`	`+ TORCH_CHECK(alpha.to<double>() != 0,`
	`192`	`+ "ZeroDivisionError: alpha cannot be 0 for CELU");`
`189`	`193`	`double inv_alpha = 1. / alpha.to<double>();`
`190`	`194`	`return at::elu_(self, alpha, Scalar(1.0), Scalar(inv_alpha));`
`191`	`195`	`}`