Add mkldnn mul operator (#20575)

XiaobingSuper · facebook-github-bot · commit b599bb3836f4 · 2019-06-12T22:41:51.000-07:00
Summary: ### mkldnn backward ops list: - [ ] \(#20567) Add aten mkldnn conv2d backward operator 💛 - [ ] \(#20570) Add aten mkldnn backward ops: relu, linear and reshape 💛 - [ ] \(#20571) Add aten mkldnn backward ops: max_pool2d, avg_pool2d and adaptive_avg_poo2d 💛 - [ ] \(#20572) Add aten mkldnn batchnorm backward operator 💛 - [ ] \(#20573) Add aten mkldnn zero_ operator:yellow_heart: - [ ] \(#20575) Add mkldnn mul operator 💛 Pull Request resolved: #20575 Differential Revision: D15799529 Pulled By: bddppq fbshipit-source-id: 4887d8ef1a0e316ad9db199b657d9481fc13e486
diff --git a/aten/src/ATen/native/mkldnn/BinaryOps.cpp b/aten/src/ATen/native/mkldnn/BinaryOps.cpp
@@ -23,6 +23,18 @@ Tensor& mkldnn_add_(Tensor& self, const Tensor& other, Scalar alpha) {
   AT_ERROR("mkldnn_add_: ATen not compiled with MKLDNN support");
 }
 
+Tensor& mkldnn_mul_out(Tensor& result, const Tensor& self, const Tensor& other) {
+  AT_ERROR("mkldnn_mul_out: ATen not compiled with MKLDNN support");
+}
+
+Tensor mkldnn_mul(const Tensor& self, const Tensor& other) {
+  AT_ERROR("mkldnn_mul: ATen not compiled with MKLDNN support");
+}
+
+Tensor& mkldnn_mul_(Tensor& self, const Tensor& other) {
+  AT_ERROR("mkldnn_mul_: ATen not compiled with MKLDNN support");
+}
+
 } // namespace native
 } // namespace at
 
@@ -63,6 +75,38 @@ Tensor& mkldnn_add_(Tensor& self, const Tensor& other, Scalar alpha) {
   return native::mkldnn_add_out(self, self, other, alpha);
 }
 
+Tensor& mkldnn_mul_out(Tensor& result, const Tensor& self, const Tensor& other) {
+  AT_ASSERTM(result.sizes() == self.sizes(),
+             "mkldnn_mul_out: the output size should be same as input size");
+  ideep::tensor& z = itensor_from_mkldnn(result);
+  ideep::tensor& x = itensor_from_mkldnn(self);
+
+  // for zero_dim tensor
+  if (other.ndimension() == 0) {
+    ideep::eltwise_forward::compute<AllocForMKLDNN>(
+      x, z, ideep::algorithm::eltwise_linear,
+      ideep::prop_kind::forward_inference, /*alpha*/ other.item().to<float>());
+
+    return result;
+  } else {
+    AT_ASSERTM(self.sizes() == other.sizes(),
+               "mkldnn_mul_out: currently mkldnn not support broadcasting");
+    ideep::tensor y = itensor_from_mkldnn(other);
+    auto op = ideep::eltwise_binary::eltwise_binary_op::ELTWISE_MUL;
+    ideep::eltwise_binary::compute<AllocForMKLDNN>(op, x, y, z);
+
+    return result;
+  }
+}
+
+Tensor mkldnn_mul(const Tensor& self, const Tensor& other) {
+  Tensor result = empty_mkldnn(self.sizes(), self.options());
+  return native::mkldnn_mul_out(result, self, other);
+}
+
+Tensor& mkldnn_mul_(Tensor& self, const Tensor& other) {
+  return native::mkldnn_mul_out(self, self, other);
+}
 
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -1303,11 +1303,30 @@
 
 - func: mul(Tensor self, Tensor other) -> Tensor
   variants: function, method
+  dispatch:
+    CPU: mul
+    CUDA: mul
+    SparseCPU: mul
+    SparseCUDA: mul
+    MkldnnCPU: mkldnn_mul
+
 
 - func: mul_(Tensor(a!) self, Tensor other) -> Tensor(a!)
   variants: method
+  dispatch:
+    CPU: mul_
+    CUDA: mul_
+    SparseCPU: mul_
+    SparseCUDA: mul_
+    MkldnnCPU: mkldnn_mul_
 
 - func: mul(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+  dispatch:
+    CPU: mul_out
+    CUDA: mul_out
+    SparseCPU: mul_out
+    SparseCUDA: mul_out
+    MkldnnCPU: mkldnn_mul_out
 
   # For C++ only, until we have conversion from C++ numbers to Tensor
 - func: mul(Tensor self, Scalar other) -> Tensor
diff --git a/test/test_mkldnn.py b/test/test_mkldnn.py
@@ -216,6 +216,55 @@ def test_add(self):
         torch.add(mx, my, alpha=alpha, out=mkldnn_out)
         self.assertEqual(out, mkldnn_out.to_dense())
 
+    def test_mul(self):
+        N = torch.randint(3, 10, (1,)).item()
+        C = torch.randint(3, 100, (1,)).item()
+        value = torch.randn(1, dtype=torch.float32).item()
+
+        x = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
+        y = torch.randn(N, C, 35, 45, dtype=torch.float32) * 10
+        mx = x.to_mkldnn()
+        my = y.to_mkldnn()
+
+        # mul
+        self.assertEqual(
+            x * y,
+            (mx * my).to_dense())
+
+        self.assertEqual(
+            x * value,
+            (mx * value).to_dense())
+
+        self.assertEqual(
+            torch.mul(x, y),
+            torch.mul(mx, my).to_dense())
+
+        self.assertEqual(
+            torch.mul(x, value),
+            torch.mul(mx, value).to_dense())
+
+        # mul_
+        x *= y
+        mx *= my
+        self.assertEqual(x, mx.to_dense())
+
+        x *= value
+        mx *= value
+        self.assertEqual(x, mx.to_dense())
+
+        # mul_out
+        out = x.clone()
+        mkldnn_out = out.to_mkldnn()
+        torch.mul(x, y, out=out)
+        torch.mul(mx, my, out=mkldnn_out)
+        self.assertEqual(out, mkldnn_out.to_dense())
+
+        out = x.clone()
+        mkldnn_out = out.to_mkldnn()
+        torch.mul(x, value, out=out)
+        torch.mul(mx, value, out=mkldnn_out)
+        self.assertEqual(out, mkldnn_out.to_dense())
+
     def test_view(self):
         x = torch.randn(3, 4, 5, dtype=torch.float32).to_mkldnn()
         self.assertRaisesRegex(RuntimeError,