Add aten mkldnn transpose (#21943)

XiaobingSuper · facebook-github-bot · commit b6f542f8a19e · 2019-06-19T22:20:46.000-07:00
Summary: This PR is about: 1. Make mkldnn reshape can share same memory fro plain format tensor. 2. Add mkldnn transpose operator. Pull Request resolved: #21943 Differential Revision: D15916063 Pulled By: bddppq fbshipit-source-id: d1971c67341f277c1e80c1fa34e213b6c27f4062
diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp
@@ -624,6 +624,10 @@ Tensor & transpose_(Tensor & self, int64_t dim0, int64_t dim1) {
     return sparse_transpose_(self, dim0, dim1);
   }
 
+  if (self.is_mkldnn()) {
+    return at::mkldnn_transpose_(self, dim0, dim1);
+  }
+
   auto strides = self.strides().vec();
   auto sizes = self.sizes().vec();
   std::swap(strides[dim0], strides[dim1]);
@@ -644,6 +648,10 @@ Tensor transpose(const Tensor & self, int64_t dim0, int64_t dim1) {
     return sparse_transpose_(self_clone, dim0, dim1);
   }
 
+  if (self.is_mkldnn()) {
+    return at::mkldnn_transpose(self, dim0, dim1);
+  }
+
   auto strides = self.strides().vec();
   auto sizes = self.sizes().vec();
   std::swap(strides[dim0], strides[dim1]);
diff --git a/aten/src/ATen/native/mkldnn/TensorShape.cpp b/aten/src/ATen/native/mkldnn/TensorShape.cpp
@@ -20,6 +20,14 @@ Tensor mkldnn_clone(const Tensor& self) {
   AT_ERROR("mkldnn_clone: ATen not compiled with MKLDNN support");
 }
 
+Tensor mkldnn_transpose(const Tensor& self, int64_t dim0, int64_t dim1) {
+  AT_ERROR("mkldnn_transpose: ATen not compiled with MKLDNN support");
+}
+
+Tensor& mkldnn_transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
+  AT_ERROR("mkldnn_transpose_: ATen not compiled with MKLDNN support");
+}
+
 } // namespace native
 } // namespace at
 
@@ -37,10 +45,12 @@ Tensor mkldnn_view(const Tensor& self, IntArrayRef size) {
 
 Tensor mkldnn_reshape(const Tensor& self, IntArrayRef size) {
   auto inferred_size = at::infer_size(size, self.numel());
+  if (self.sizes() == inferred_size) {
+    return self;
+  }
   const ideep::tensor& x = itensor_from_mkldnn(self);
-  ideep::tensor y;
-  ideep::direct_copy::compute<AllocForMKLDNN>(x, y);
-  y.reshape({inferred_size.cbegin(), inferred_size.cend()});
+  ideep::tensor y{x};
+  y.reshape<AllocForMKLDNN>({inferred_size.cbegin(), inferred_size.cend()});
   return new_with_itensor_mkldnn(std::move(y), self.options());
 }
 
@@ -51,6 +61,20 @@ Tensor mkldnn_clone(const Tensor& self) {
   return new_with_itensor_mkldnn(std::move(dst), self.options());
 }
 
+Tensor mkldnn_transpose(const Tensor & self, int64_t dim0, int64_t dim1) {
+  const ideep::tensor& x = itensor_from_mkldnn(self);
+  ideep::tensor y;
+  std::vector<int> axes(x.ndims());
+  std::iota(axes.begin(), axes.end(), 0);
+  std::swap(axes[dim0], axes[dim1]);
+  y.transpose_from<AllocForMKLDNN>(x, axes);
+  return new_with_itensor_mkldnn(std::move(y), self.options());
+}
+
+Tensor& mkldnn_transpose_(Tensor& self, int64_t dim0, int64_t dim1) {
+  AT_ERROR("mkldnn_transpose_: in-place mkldnn operations are not supported yet");
+}
+
 } // namespace native
 } // namespace at
 
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -1987,10 +1987,22 @@
   variants: function, method
   device_guard: False
 
+- func: mkldnn_transpose(Tensor self, int dim0, int dim1) -> Tensor
+  device_guard: False
+  requires_tensor: True
+  dispatch:
+    MkldnnCPU: mkldnn_transpose
+
 - func: transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
   variants: method
   device_guard: False
 
+- func: mkldnn_transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
+  device_guard: False
+  requires_tensor: True
+  dispatch:
+    MkldnnCPU: mkldnn_transpose_
+
 - func: one_hot(Tensor self, int num_classes=-1) -> Tensor
   python_module: nn
   variants: function
diff --git a/test/test_mkldnn.py b/test/test_mkldnn.py
@@ -279,6 +279,13 @@ def test_reshape(self):
             x.reshape(size),
             x.to_mkldnn().reshape(size).to_dense(),
         )
+        # test whether share same memory for plain format tensor
+        y = x.to_mkldnn()
+        z = y.reshape(size).add_(y.reshape(size))
+        self.assertEqual(
+            y.reshape(size).to_dense(),
+            z.to_dense(),
+        )
 
     def test_clone(self):
         x = torch.randn(4, 5, dtype=torch.float32) * 10
@@ -294,6 +301,15 @@ def test_clone(self):
             z.to_dense(),
         )
 
+    def test_transpose(self):
+        x = torch.randn(3, 4, 5, dtype=torch.float32) * 10
+        for dim1 in range(x.ndim):
+            for dim2 in range(x.ndim):
+                self.assertEqual(
+                    x.transpose(dim1, dim2),
+                    x.to_mkldnn().transpose(dim1, dim2).to_dense(),
+                )
+
     def test_linear(self):
         in_features = torch.randint(3, 10, (1,)).item()
         out_features = torch.randint(3, 100, (1,)).item()