copy_(Sparse, Sparse) for sparse tensor (#9005)

weiyangfb · facebook-github-bot · commit 572132fb1701 · 2018-09-30T11:55:09.000-07:00
Summary: - fix #8330 - add `torch.copy_(Sparse, Sparse)` with autograd support Pull Request resolved: #9005 Differential Revision: D8987885 Pulled By: weiyangfb fbshipit-source-id: b317a41da22ee1eae2835622a0ed28a6771a3a06
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -2068,8 +2068,8 @@
     SparseCPU: hspmm_sparse_cpu
     SparseCUDA: hspmm_sparse_cuda
 
-# This "raw copy" doesn't handle conversions NOR does it handle non-blocking.
-- func: raw_copy_sparse_(Tensor self, Tensor src) -> Tensor
+- func: copy_sparse_to_sparse_(Tensor self, Tensor src, bool non_blocking=false) -> Tensor
+  variants: function
   dispatch:
     SparseCPU: copy_sparse_
     SparseCUDA: copy_sparse_
diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp
@@ -204,7 +204,7 @@ SparseTensor new_with_tensor_and_size_sparse(const LongTensor& indices, const Te
 
 SparseTensor clone_sparse(const SparseTensor& self) {
   SparseTensor other = new_with_dims_and_size_sparse(self.type(), self._sparseDims(), self._denseDims(), self.sizes());
-  _copy_into_sparse(other, _get_sparse_impl(self)->indices(), _get_sparse_impl(self)->values());
+  _copy_into_sparse(other, _get_sparse_impl(self)->indices(), _get_sparse_impl(self)->values(), true);
   _get_sparse_impl(other)->set_coalesced(self.is_coalesced());
   return other;
 }
@@ -243,11 +243,11 @@ Tensor sparse_to_dense(const SparseTensor& self) {
   return dst.add_(self);
 }
 
-SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src) {
+SparseTensor& copy_sparse_(SparseTensor& self, const SparseTensor& src, bool non_blocking) {
   if (isSameTensor(self, src)) return self;
   _get_sparse_impl(self)->resize_(src._sparseDims(), src._denseDims(), src.sizes());
   // NB: This seems to copy the underlying full indices/values buffer
-  _copy_into_sparse(self, _get_sparse_impl(src)->indices(), _get_sparse_impl(src)->values());
+  _copy_into_sparse(self, _get_sparse_impl(src)->indices(), _get_sparse_impl(src)->values(), non_blocking);
   _get_sparse_impl(self)->set_coalesced(src.is_coalesced());
   return self;
 }
diff --git a/aten/src/ATen/native/sparse/SparseTensorMath.cpp b/aten/src/ATen/native/sparse/SparseTensorMath.cpp
@@ -98,7 +98,7 @@ SparseTensor& log1p_out_sparse(SparseTensor& r, const SparseTensor& t) {
       r.is_coalesced(), "log1p: in-place on uncoalesced tensors is not supported yet!");
   }
   else {
-    r = raw_copy_sparse_(r, t.coalesce());
+    copy_sparse_to_sparse_(r, t.coalesce());
   }
   r._values().log1p_();
   return r;
@@ -192,7 +192,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S
   AT_CHECK(t.sizes().equals(src.sizes()), "add: expected sizes of 'self' and 'other' to match, but ", t.sizes(), " != ", src.sizes());
 
   if (src._nnz() == 0) {
-    return raw_copy_sparse_(r, t);
+    return copy_sparse_to_sparse_(r, t);
   }
   if (t._nnz() == 0) {
     return mul_out_sparse_scalar(r, src, value);
diff --git a/aten/src/ATen/native/sparse/SparseUtils.h b/aten/src/ATen/native/sparse/SparseUtils.h
@@ -50,8 +50,8 @@ inline void _alias_into_sparse(const SparseTensor& self, const LongTensor& indic
 
 // Take indices and values and makes a (data) copy of them to put into the sparse
 // indices/values.  This used to be called THSTensor_(_set)
-inline void _copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) {
-  _alias_into_sparse(self, indices.clone(), values.clone());
+inline void _copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) {
+  _alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking));
 }
 
 // Does NOT make copies of indices/values
diff --git a/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu b/aten/src/ATen/native/sparse/cuda/SparseCUDATensorMath.cu
@@ -363,7 +363,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const
   AT_CHECK(t.sizes().equals(src.sizes()), "add: expected 'self' and 'other' to have same size, but ", t.sizes(), " != ", src.sizes());
 
   if (src._nnz() == 0) {
-    return raw_copy_sparse_(r_, t);
+    return copy_sparse_to_sparse_(r_, t);
   }
   if (t._nnz() == 0) {
     return mul_out_sparse_scalar(r_, src, value);
diff --git a/aten/src/ATen/templates/TypeDefault.cpp b/aten/src/ATen/templates/TypeDefault.cpp
@@ -18,7 +18,8 @@ namespace at {
 
 Tensor & TypeDefault::copy_(Tensor & self, const Tensor & src, bool non_blocking) const {
   Tensor b_src;
-  std::tie(b_src) = expand_inplace(self, src, "copy");
+  if (is_sparse()) b_src = src;
+  else std::tie(b_src) = expand_inplace(self, src, "copy");
   return s_copy_(self, b_src, non_blocking);
 }
 
@@ -28,19 +29,11 @@ Tensor TypeDefault::copy(const Tensor & src, bool non_blocking, optional<Device>
     device_guard.set_index(to_device.value().index());
   }
   AT_CHECK(src.defined(), "attempt to copy an undefined tensor");
-  if (is_sparse()) {
-    auto indices = src._indices();
-    auto values = src._values();
-    auto & this_dense = toBackend(is_cuda() ? Backend::CUDA : Backend::CPU);
-    auto & this_dense_idx = this_dense.toScalarType(ScalarType::Long);
-    auto indices_copy = this_dense_idx.copy(indices, non_blocking);
-    auto values_copy = this_dense.copy(values, non_blocking);
-    return _sparse_coo_tensor_unsafe(indices_copy, values_copy, src.sizes());
-  } else {
-    Tensor r = this->tensor(src.sizes());
-    r.copy_(src, non_blocking);
-    return r;
-  }
+  Tensor r;
+  if (is_sparse()) r = this->native_tensor();
+  else r = this->tensor(src.sizes());
+  r.copy_(src, non_blocking);
+  return r;
 }
 
 void TypeDefault::backward(Tensor & self, at::optional<Tensor> gradient, bool keep_graph, bool create_graph) const {
diff --git a/test/test_sparse.py b/test/test_sparse.py
@@ -496,6 +496,74 @@ def test_shape(sparse_dims, nnz, with_size):
         test_shape(3, 10, [100, 100, 100, 5, 5, 5, 0])
         test_shape(3, 0, [0, 0, 100, 5, 5, 5, 0])
 
+    def test_Sparse_to_Sparse_copy_(self):
+        # This is for testing torch.copy_(SparseTensor, SparseTensor)
+        sparse_dims = 3
+        nnz = 10
+        sizes = [2, 3, 4, 5]  # hybrid sparse
+        x1, _, _ = self._gen_sparse(sparse_dims, nnz, sizes)
+        x2, _, _ = self._gen_sparse(sparse_dims, nnz + 10, sizes)
+
+        # test copy
+        x2_dense = x2.to_dense()
+        x1.copy_(x2)
+        self.assertEqual(x2_dense, x1.to_dense())
+
+        # test type conversion (when x1.copy_(x2), x1.dtype should stay the same)
+        x1 = x1.to(torch.float32)
+        x2 = x2.to(torch.float64)
+        x1_dtype = x1.dtype
+        x1.copy_(x2)
+        self.assertEqual(x1_dtype, x1.dtype)
+
+        # test no broadcast
+        self.assertRaises(RuntimeError, lambda: x1.copy_(x2.narrow_copy(0, 0, 1)))
+
+        # test raise error on copy_() between dense and sparse Tensors
+        self.assertRaises(RuntimeError, lambda: x1.copy_(torch.randn(5, 5)))
+
+        # test autograd
+        x1, _, _ = self._gen_sparse(sparse_dims, nnz, sizes)
+        x2, _, _ = self._gen_sparse(sparse_dims, nnz + 10, sizes)
+        x2.requires_grad_(True)
+        x1.copy_(x2)
+        y = x1 * 2
+        x2_clone = x2.clone()
+        y.backward(x2_clone)
+        expected_grad = x2_clone * 2
+        self.assertEqual(expected_grad.to_dense(), x2.grad.to_dense())
+        self.assertEqual(None, x1.grad)
+
+    @unittest.skipIf(torch.cuda.device_count() < 2, "no multi-GPU")
+    def test_Sparse_to_Sparse_copy_multi_gpu(self):
+        # This is for testing torch.copy_(SparseTensor, SparseTensor) across GPU devices
+        sparse_dims = 3
+        nnz = 10
+        sizes = [2, 3, 4, 5]  # hybrid sparse
+        x1, _, _ = self._gen_sparse(sparse_dims, nnz, sizes)
+        x2, _, _ = self._gen_sparse(sparse_dims, nnz + 10, sizes)
+        x1 = x1.to('cuda:0')
+
+        def test_cross_device(x1, x2):
+            x1_device = x1.device
+            x1.copy_(x2)
+            self.assertEqual(x2.to('cuda:0').to_dense(), x1.to_dense())
+            self.assertEqual(x1_device, x1.device)
+
+        test_cross_device(x1, x2.to('cuda:1'))  # test across gpu devices
+        test_cross_device(x1, x2.to('cpu'))  # test between cpu and gpu
+
+        # test autograd
+        x2 = x2.to('cuda:1')
+        x2.requires_grad_(True)
+        x1.copy_(x2)
+        y = x1 * 2
+        x2_clone = x2.clone().to('cuda:0')
+        y.backward(x2_clone)
+        expected_grad = x2_clone * 2
+        self.assertEqual(expected_grad.to_dense(), x2.grad.to('cuda:0').to_dense())
+        self.assertEqual(None, x1.grad)
+
     @cuda_only
     def test_cuda_empty(self):
         def test_tensor(x):
diff --git a/tools/autograd/gen_python_functions.py b/tools/autograd/gen_python_functions.py
@@ -28,7 +28,8 @@
     '_cumsum.*', '_cumprod.*', '_sum.*', '_prod.*', '_th_.*',
     'arange.*', 'range.*', '_gesv.*', '_getri.*', 'slice', 'randint(_out)?',
     '_local_scalar', '_local_scalar_dense',
-    'max_pool1d', 'max_pool2d', 'max_pool3d', 'linear', 'to'
+    'max_pool1d', 'max_pool2d', 'max_pool3d', 'linear', 'to',
+    'copy_sparse_to_sparse_'
 ]
 
 # These function signatures are not exposed to Python. Note that this signature
diff --git a/tools/autograd/templates/VariableType.cpp b/tools/autograd/templates/VariableType.cpp
@@ -416,7 +416,9 @@ Tensor & VariableType::s_copy_(Tensor & self, const Tensor & src, bool non_block
       grad_fn->src_device = src.get_device();
     }
   }
-  baseType->s_copy_(self_, src_, non_blocking);
+  if (self.is_sparse() && src.is_sparse()) baseType->copy_sparse_to_sparse_(self_, src_, non_blocking);
+  else if (!self.is_sparse() && !src.is_sparse()) baseType->s_copy_(self_, src_, non_blocking);
+  else AT_ERROR("copy_() between dense and sparse Tensors is not implemented! Found self type = ", self.type(), " and src type = ", src.type());
   increment_version(self);
   rebase_history(as_variable_ref( self ), std::move(grad_fn));
   if(torch::jit::tracer::isTracing()) {

Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,7 @@ SparseTensor& log1p_out_sparse(SparseTensor& r, const SparseTensor& t) {`
`98`	`98`	`r.is_coalesced(), "log1p: in-place on uncoalesced tensors is not supported yet!");`
`99`	`99`	`}`
`100`	`100`	`else {`
`101`		`- r = raw_copy_sparse_(r, t.coalesce());`
	`101`	`+ copy_sparse_to_sparse_(r, t.coalesce());`
`102`	`102`	`}`
`103`	`103`	`r._values().log1p_();`
`104`	`104`	`return r;`
`@@ -192,7 +192,7 @@ SparseTensor& add_out_sparse_cpu(SparseTensor& r, const SparseTensor& t, const S`
`192`	`192`	`AT_CHECK(t.sizes().equals(src.sizes()), "add: expected sizes of 'self' and 'other' to match, but ", t.sizes(), " != ", src.sizes());`
`193`	`193`
`194`	`194`	`if (src._nnz() == 0) {`
`195`		`- return raw_copy_sparse_(r, t);`
	`195`	`+ return copy_sparse_to_sparse_(r, t);`
`196`	`196`	`}`
`197`	`197`	`if (t._nnz() == 0) {`
`198`	`198`	`return mul_out_sparse_scalar(r, src, value);`
Original file line number	Diff line number	Diff line change
`@@ -50,8 +50,8 @@ inline void _alias_into_sparse(const SparseTensor& self, const LongTensor& indic`
`50`	`50`
`51`	`51`	`// Take indices and values and makes a (data) copy of them to put into the sparse`
`52`	`52`	`// indices/values. This used to be called THSTensor_(_set)`
`53`		`-inline void _copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values) {`
`54`		`- _alias_into_sparse(self, indices.clone(), values.clone());`
	`53`	`+inline void _copy_into_sparse(const SparseTensor& self, const LongTensor& indices, const Tensor& values, bool non_blocking) {`
	`54`	`+ _alias_into_sparse(self, self._indices().type().copy(indices, non_blocking), self._values().type().copy(values, non_blocking));`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`// Does NOT make copies of indices/values`
Original file line number	Diff line number	Diff line change
`@@ -363,7 +363,7 @@ SparseTensor& add_out_sparse_cuda(SparseTensor& r_, const SparseTensor& t, const`
`363`	`363`	`AT_CHECK(t.sizes().equals(src.sizes()), "add: expected 'self' and 'other' to have same size, but ", t.sizes(), " != ", src.sizes());`
`364`	`364`
`365`	`365`	`if (src._nnz() == 0) {`
`366`		`- return raw_copy_sparse_(r_, t);`
	`366`	`+ return copy_sparse_to_sparse_(r_, t);`
`367`	`367`	`}`
`368`	`368`	`if (t._nnz() == 0) {`
`369`	`369`	`return mul_out_sparse_scalar(r_, src, value);`
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,8 @@`
`28`	`28`	`'_cumsum.', '_cumprod.', '_sum.', '_prod.', '_th_.*',`
`29`	`29`	`'arange.', 'range.', '_gesv.', '_getri.', 'slice', 'randint(_out)?',`
`30`	`30`	`'_local_scalar', '_local_scalar_dense',`
`31`		`- 'max_pool1d', 'max_pool2d', 'max_pool3d', 'linear', 'to'`
	`31`	`+ 'max_pool1d', 'max_pool2d', 'max_pool3d', 'linear', 'to',`
	`32`	`+ 'copy_sparse_to_sparse_'`
`32`	`33`	`]`
`33`	`34`
`34`	`35`	`# These function signatures are not exposed to Python. Note that this signature`
Original file line number	Diff line number	Diff line change
`@@ -416,7 +416,9 @@ Tensor & VariableType::s_copy_(Tensor & self, const Tensor & src, bool non_block`
`416`	`416`	`grad_fn->src_device = src.get_device();`
`417`	`417`	`}`
`418`	`418`	`}`
`419`		`- baseType->s_copy_(self_, src_, non_blocking);`
	`419`	`+ if (self.is_sparse() && src.is_sparse()) baseType->copy_sparse_to_sparse_(self_, src_, non_blocking);`
	`420`	`+ else if (!self.is_sparse() && !src.is_sparse()) baseType->s_copy_(self_, src_, non_blocking);`
	`421`	`+ else AT_ERROR("copy_() between dense and sparse Tensors is not implemented! Found self type = ", self.type(), " and src type = ", src.type());`
`420`	`422`	`increment_version(self);`
`421`	`423`	`rebase_history(as_variable_ref( self ), std::move(grad_fn));`
`422`	`424`	`if(torch::jit::tracer::isTracing()) {`