Support copy_ for Sparse Compressed tensors. (#77605)

pearu · facebook-github-bot · commit bd7890b9159c · 2022-05-19T17:25:40.000-07:00
Summary: Pull Request resolved: #77605 Approved by: https://github.com/cpuhrsch Test Plan: contbuild & OSS CI, see https://hud.pytorch.org/commit/pytorch/pytorch/8b5f11c61eecd58214c631056a634f2eedc6455a Reviewed By: seemethere Differential Revision: D36494385 Pulled By: seemethere fbshipit-source-id: 103bffbddfecce3aaa728f06c8f5c2d16f0a0667
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -1419,7 +1419,7 @@
     MkldnnCPU: copy_mkldnn_
     SparseCPU, SparseCUDA: copy_sparse_wrapper_
     CompositeExplicitAutograd: copy_
-    SparseCsrCPU, SparseCsrCUDA: copy_sparse_csr_
+    SparseCsrCPU, SparseCsrCUDA: copy_sparse_compressed_
 
 - func: _copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor
   dispatch:
diff --git a/aten/src/ATen/native/sparse/SparseCsrTensor.cpp b/aten/src/ATen/native/sparse/SparseCsrTensor.cpp
@@ -170,7 +170,7 @@ void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_ind
         Tensor compressed_indices_cpu = compressed_indices.to(kCPU);
         auto compressed_indices_data_ptr = compressed_indices_cpu.data_ptr<index_t>();
         auto batch_stride = compressed_indices_cpu.dim() >= 2 ? compressed_indices_cpu.stride(-2) : 0;
-
+        auto compressed_dims = size[compressedDimension(layout, size)];
         for (const auto batch_id : c10::irange(batchCount(compressed_indices_cpu))) {
           TORCH_CHECK(
                       compressed_indices_data_ptr[batch_id*batch_stride] == 0,
@@ -180,7 +180,7 @@ void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_ind
                       compressed_indices_data_ptr[batch_id*batch_stride + compressed_indices.size(-1) - 1] == plain_indices.size(-1),
                       "(Batch element ", batch_id, ") ",
                       "last value of ", compressed_indices_name, " should be equal to the length of ", plain_indices_name, ".");
-          for (int i =  1; i <= size[size.size() - 2]; i++) {
+          for (int i =  1; i <= compressed_dims; i++) {
             TORCH_CHECK(
                         compressed_indices_data_ptr[batch_id*batch_stride + i - 1] <= compressed_indices_data_ptr[batch_id*batch_stride + i],
                         "(Batch element ", batch_id, ") ",
@@ -513,18 +513,52 @@ const Tensor& resize_sparse_csr_(
   return self;
 }
 
-Tensor& copy_sparse_csr_(Tensor& self, const Tensor& src, bool non_blocking) {
+Tensor& copy_sparse_compressed_(Tensor& self, const Tensor& src, bool non_blocking) {
+  AT_DISPATCH_ALL_SPARSE_COMPRESSED_LAYOUTS(self.layout(), "copy_sparse_compressed_", [&]{});
   TORCH_CHECK(
-      self.is_sparse_csr() && src.is_sparse_csr(),
-      "copy_sparse_csr_: copy between different layouts is not supported. Found self type = ",
-      self.toString(),
-      " and src type = ",
-      src.toString());
+      self.layout() == src.layout(),
+      "torch.copy_: copy of sparse compressed tensors having different layouts is not supported.",
+      " self layout is ", self.layout(), " and src layout is ", src.layout());
   TORCH_CHECK(
-      self._nnz() == src._nnz(),
-      "copy_sparse_csr_: only tensors with the same number of specified elements are supported.");
-  self.crow_indices().copy_(src.crow_indices(), non_blocking);
-  self.col_indices().copy_(src.col_indices(), non_blocking);
+      self._nnz() == src._nnz(),  // actually, values copy allows different shapes as long as operands are broadcastable
+      "torch.copy_: only sparse compressed tensors with the same number of specified elements are supported.");
+  auto self_compressed_dim = compressedDimension(self.layout(), self.sizes());
+  auto src_compressed_dim = compressedDimension(src.layout(), src.sizes());
+  auto self_compressed_dims = self.size(self_compressed_dim);
+  auto src_compressed_dims = src.size(compressedDimension(src.layout(), src.sizes()));
+  if (self_compressed_dim == src_compressed_dim) {
+    TORCH_CHECK(self_compressed_dims == src_compressed_dims,
+                "torch.copy_: expected shapes of self and src to match along dimension ",
+                self_compressed_dim, " for ",
+                self.layout(), " layout but the corresponding dimensions of self and src are ",
+                self_compressed_dims, " and ", src_compressed_dims, ", respecitvely.");
+  } else {
+    TORCH_CHECK(self_compressed_dims == src_compressed_dims,
+                "torch.copy_: expected shapes of self and src to match along dimensions ",
+                self_compressed_dim, " and ", src_compressed_dim, ", respectively, for ",
+                self.layout(), " layout but the corresponding dimensions of self and src are ",
+                self_compressed_dims, " and ", src_compressed_dims, ", respecitvely.");
+  }
+  AT_DISPATCH_PLAIN_SPARSE_COMPRESSED_LAYOUTS(self.layout(), "copy_sparse_compressed_",
+                                              [&]{},
+                                              [&]{
+                                                auto self_values = self.values();
+                                                auto src_values = src.values();
+                                                auto self_block_size = DimVector(self_values.sizes().slice(self_values.dim()-2, 2));
+                                                auto src_block_size = DimVector(src_values.sizes().slice(src_values.dim()-2, 2));
+                                                TORCH_CHECK(self_block_size == src_block_size,
+                                                            "torch.copy_: copy of sparse compressed tensors having different block sizes is not supported.",
+                                                            " self and src block sizes are ", self_block_size, " and ", src_block_size, ", respectivly.");
+                                              });
+  AT_DISPATCH_ROW_SPARSE_COMPRESSED_LAYOUTS(self.layout(), "copy_sparse_compressed_",
+                                            [&]{
+                                              self.crow_indices().copy_(src.crow_indices(), non_blocking);
+                                              self.col_indices().copy_(src.col_indices(), non_blocking);
+                                            },
+                                            [&]{
+                                              self.ccol_indices().copy_(src.ccol_indices(), non_blocking);
+                                              self.row_indices().copy_(src.row_indices(), non_blocking);
+                                            });
   self.values().copy_(src.values(), non_blocking);
   return self;
 }
diff --git a/aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp b/aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp
@@ -148,9 +148,9 @@ Tensor& unary_op_out(F op_out, const Tensor& self, Tensor& result) {
     if (result.numel() == 0) {
       at::native::resize_as_sparse_csr_(result, self);
     }
-    // copy_sparse_csr_ internally checks the sizes of result and self tensors
+    // copy_sparse_compressed_ internally checks the sizes of result and self tensors
     // Hence no external size check required
-    at::native::copy_sparse_csr_(result, self);
+    at::native::copy_sparse_compressed_(result, self);
   }
 
   auto self_values = self.values();
diff --git a/test/test_sparse_csr.py b/test/test_sparse_csr.py
@@ -363,6 +363,64 @@ def test_print(self, layout, device):
             self.maxDiff = orig_maxDiff
             raise
 
+    @skipMeta
+    @all_sparse_compressed_layouts()
+    @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
+    def test_copy(self, layout, device, dtype):
+
+        def run_test(shape, nnz, index_type):
+            block_size = (2, 3) if layout in {torch.sparse_bsr, torch.sparse_bsc} else ()
+            a = self.genSparseCompressedTensor(shape, nnz, dtype=dtype, layout=layout, device=device,
+                                               index_dtype=index_dtype, block_size=block_size)
+            b = self.genSparseCompressedTensor(shape, nnz, dtype=dtype, layout=layout, device=device,
+                                               index_dtype=index_dtype, block_size=block_size)
+
+            a.copy_(b)
+
+            self.assertEqual(a, b)
+
+        ns = [5, 2, 0]
+        batch_shapes = [(), (2,), (2, 3)]
+        for (m, n, b), index_dtype in zip(itertools.product(ns, ns, batch_shapes), [torch.int32, torch.int64]):
+            run_test((*b, m, n), 0, index_dtype)
+            run_test((*b, m, n), m * n, index_dtype)
+
+    @skipMeta
+    @all_sparse_compressed_layouts()
+    @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
+    def test_copy_errors(self, layout, device, dtype):
+        block_size = (2, 3) if layout in {torch.sparse_bsr, torch.sparse_bsc} else ()
+        for index_dtype in [torch.int32, torch.int64]:
+            shape1 = (2, 3)
+            a = self.genSparseCompressedTensor(shape1, 0, dtype=dtype, layout=layout, device=device,
+                                               index_dtype=index_dtype, block_size=block_size)
+
+            with self.assertRaisesRegex(RuntimeError,
+                                        "copy of sparse compressed tensors having different layouts is not supported."):
+                a.copy_(torch.empty(a.shape, dtype=dtype, device=device))
+
+            b = self.genSparseCompressedTensor(shape1, 1, dtype=dtype, layout=layout, device=device,
+                                               index_dtype=index_dtype, block_size=block_size)
+            with self.assertRaisesRegex(RuntimeError,
+                                        "only sparse compressed tensors with the same number of specified elements are supported."):
+                a.copy_(b)
+
+            shape2 = tuple(reversed(shape1))
+            c = self.genSparseCompressedTensor(shape2, 1, dtype=dtype, layout=layout, device=device,
+                                               index_dtype=index_dtype, block_size=block_size)
+            with self.assertRaisesRegex(
+                    RuntimeError,
+                    "expected shapes of self and src to match along dimension"):
+                b.copy_(c)
+
+            if block_size:
+                block_size1 = tuple(reversed(block_size))
+                d = self.genSparseCompressedTensor(shape1, 1, dtype=dtype, layout=layout, device=device,
+                                                   index_dtype=index_dtype, block_size=block_size1)
+                with self.assertRaisesRegex(RuntimeError,
+                                            "copy of sparse compressed tensors having different block sizes is not supported"):
+                    b.copy_(d)
+
 
 class TestSparseCSR(TestCase):
 
@@ -435,38 +493,6 @@ def test_sparse_csr_select(self, device, dtype):
         with self.assertRaisesRegex(TypeError, "Cannot assign to a sparse tensor"):
             sparse[0, 0, 0, 0] = 99.0
 
-    @skipMeta
-    @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
-    def test_copy(self, device, dtype):
-
-        def run_test(shape, nnz, index_type):
-            a = self.genSparseCSRTensor(shape, nnz, dtype=dtype, device=device, index_dtype=index_dtype)
-            b = self.genSparseCSRTensor(shape, nnz, dtype=dtype, device=device, index_dtype=index_dtype)
-
-            a.copy_(b)
-
-            self.assertEqual(a, b)
-
-        ns = [5, 2, 0]
-        batch_shapes = [(), (2,), (2, 3)]
-        for (m, n, b), index_dtype in zip(itertools.product(ns, ns, batch_shapes), [torch.int32, torch.int64]):
-            run_test((*b, m, n), 0, index_dtype)
-            run_test((*b, m, n), m * n, index_dtype)
-
-    @skipMeta
-    @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
-    def test_copy_errors(self, device, dtype):
-        for index_dtype in [torch.int32, torch.int64]:
-            shape1 = (2, 3)
-            a = self.genSparseCSRTensor(shape1, 0, dtype=dtype, device=device, index_dtype=index_dtype)
-
-            with self.assertRaisesRegex(RuntimeError, "copy between different layouts is not supported."):
-                a.copy_(torch.empty(a.shape, dtype=dtype, device=device))
-
-            b = self.genSparseCSRTensor(shape1, 1, dtype=dtype, device=device, index_dtype=index_dtype)
-            with self.assertRaisesRegex(RuntimeError, "only tensors with the same number of specified elements are supported."):
-                a.copy_(b)
-
     @skipMeta
     @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
     def test_resize(self, device, dtype):
diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
@@ -2061,7 +2061,8 @@ def random_sparse_compressed(n_compressed_dims, n_plain_dims, nnz):
             n_compressed_dims, n_plain_dims = size[-1], size[-2]
         sparse_tensors = [random_sparse_compressed(n_compressed_dims, n_plain_dims, nnz) for _ in range(n_batch)]
         sparse_tensors_it = map(list, zip(*sparse_tensors))
-        values = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, -1)
+
+        values = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, nnz, *block_size)
         compressed_indices = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, -1)
         plain_indices = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, -1)
 

Original file line number	Diff line number	Diff line change
`@@ -148,9 +148,9 @@ Tensor& unary_op_out(F op_out, const Tensor& self, Tensor& result) {`
`148`	`148`	`if (result.numel() == 0) {`
`149`	`149`	`at::native::resize_as_sparse_csr_(result, self);`
`150`	`150`	`}`
`151`		`- // copy_sparse_csr_ internally checks the sizes of result and self tensors`
	`151`	`+ // copy_sparse_compressed_ internally checks the sizes of result and self tensors`
`152`	`152`	`// Hence no external size check required`
`153`		`- at::native::copy_sparse_csr_(result, self);`
	`153`	`+ at::native::copy_sparse_compressed_(result, self);`
`154`	`154`	`}`
`155`	`155`
`156`	`156`	`auto self_values = self.values();`