Check for internal memory overlap in some indexing-type functions

peterbell10 · peterbell10 · commit 13abcd11b098 · 2020-08-22T00:41:47.000+01:00
ghstack-source-id: 2e93fdc Pull Request resolved: #43423
diff --git a/aten/src/ATen/native/LegacyDefinitions.cpp b/aten/src/ATen/native/LegacyDefinitions.cpp
@@ -3,12 +3,14 @@
 #include <ATen/LegacyTHFunctionsCPU.h>
 #include <ATen/NamedTensorUtils.h>
 #include <ATen/ExpandUtils.h>
+#include <ATen/MemoryOverlap.h>
 
 namespace at { namespace native {
 
 // Methods
 
 Tensor & masked_scatter__cpu(Tensor& self, const Tensor & mask, const Tensor & source) {
+  at::assert_no_internal_overlap(self);
   Tensor b_mask;
   std::tie(b_mask) = expand_inplace(self, mask, "masked_scatter_");
   // As we dispatch on self and TH is type-checked, we need different definitions.
diff --git a/aten/src/ATen/native/TensorAdvancedIndexing.cpp b/aten/src/ATen/native/TensorAdvancedIndexing.cpp
@@ -272,6 +272,7 @@ Tensor index(const Tensor & self, TensorList indices) {
 
 Tensor& index_out(Tensor& result, const Tensor & self, TensorList indices) {
   TORCH_CHECK_INDEX(indices.size() <= (size_t)self.dim(), "too many indices for tensor of dimension ", self.dim(), " (got ", indices.size(), ")");
+  at::assert_no_internal_overlap(result);
 
   auto info = make_info(self, indices);
   auto iter = make_index_out_iterator(info, result);
@@ -291,6 +292,7 @@ Tensor & _index_put_impl_(Tensor & self, TensorList indices, const Tensor & valu
       index_put_accum_stub(self.device().type(), self, indices, value, unsafe);
       return self;
   }
+  at::assert_no_internal_overlap(self);
   auto info = make_info(self, indices);
   auto iter = make_index_put_iterator(info, value);
   index_put_stub(iter.device_type(), iter, info.indexed_sizes, info.indexed_strides, accumulate);
@@ -429,6 +431,7 @@ Tensor & index_select_out_cpu_(Tensor & result, const Tensor & self, int64_t dim
               "index_select(): self and result must have the same scalar type");
   TORCH_CHECK(dim == 0 || dim < self.dim(),
               "index_select(): Indexing dim ", dim, " is out of bounds of tensor");
+  at::assert_no_internal_overlap(result);
 
   auto result_size = self.sizes().vec();
   if (self.dim() > 0) {
@@ -698,6 +701,7 @@ static Tensor & masked_select_out_impl_cpu(Tensor & result, const Tensor & self,
               "masked_select: expected BoolTensor or ByteTensor for mask");
   TORCH_CHECK(self.scalar_type() == result.scalar_type(),
               "masked_select(): self and result must have the same scalar type");
+  at::assert_no_internal_overlap(result);
 
   at::assert_no_internal_overlap(result);
   at::assert_no_partial_overlap(result, self);
diff --git a/aten/src/ATen/native/cuda/Indexing.cu b/aten/src/ATen/native/cuda/Indexing.cu
@@ -813,6 +813,7 @@ Tensor& index_select_out_cuda(Tensor& out, const Tensor& self, int64_t dim,
 
   TORCH_CHECK(at::cuda::check_device({out, self, index}),
               "Input, output and indices must be on the current device");
+  at::assert_no_internal_overlap(out);
 
   dim = at::maybe_wrap_dim(dim, self);
   TORCH_CHECK(self.dim() <= MAX_TENSORINFO_DIMS, DIM_WARNING);
diff --git a/aten/src/ATen/native/cuda/LegacyDefinitions.cpp b/aten/src/ATen/native/cuda/LegacyDefinitions.cpp
@@ -11,6 +11,7 @@ namespace at { namespace native {
 
 Tensor & masked_fill__cuda(Tensor& self, const Tensor & mask, Scalar value) {
   auto maybe_outnames = namedinference::broadcast_to_outnames(self, mask, "masked_fill_");
+  at::assert_no_internal_overlap(self);
   Tensor b_mask;
   std::tie(b_mask) = expand_inplace(self, mask, "masked_fill_");
   // As we dispatch on self and TH is type-checked, we need different definitions.
@@ -28,6 +29,7 @@ Tensor & masked_fill__cuda(Tensor& self, const Tensor & mask, Scalar value) {
 
 Tensor & masked_fill__cuda(Tensor& self, const Tensor & mask, const Tensor & value) {
   auto maybe_outnames = namedinference::broadcast_to_outnames(self, mask, "masked_fill_");
+  at::assert_no_internal_overlap(self);
 
   TORCH_CHECK(value.dim() == 0, "masked_fill_ only supports a 0-dimensional value tensor, but got tensor "
       "with ", value.dim(), " dimension(s).");
@@ -47,6 +49,7 @@ Tensor & masked_fill__cuda(Tensor& self, const Tensor & mask, const Tensor & val
 }
 
 Tensor & masked_scatter__cuda(Tensor& self, const Tensor & mask, const Tensor & source) {
+  at::assert_no_internal_overlap(self);
   Tensor b_mask;
   std::tie(b_mask) = expand_inplace(self, mask, "masked_scatter_");
   // As we dispatch on self and TH is type-checked, we need different definitions.
diff --git a/aten/src/ATen/native/cuda/Shape.cu b/aten/src/ATen/native/cuda/Shape.cu
@@ -314,6 +314,7 @@ Tensor& cat_out_cuda(Tensor& out, TensorList inputs, int64_t dimension) {
                 "of the output memory locations. Found overlap in input "
                 "tensor ", i);
   }
+  at::assert_no_internal_overlap(out);
 
   for (int i = 0; i < inputs.size(); i++)
   {
diff --git a/test/test_distributions.py b/test/test_distributions.py
@@ -4926,8 +4926,8 @@ def f(*values):
 
             # check on different data
             values, sample = self._perturb(Dist, keys, values, sample)
-            expected = f(*values)
-            actual = traced_f(*values)
+            expected = f(*values).clone()
+            actual = traced_f(*values).clone()
             expected[expected == float('inf')] = 0.
             actual[actual == float('inf')] = 0.
             self.assertEqual(expected, actual,
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -14414,6 +14414,67 @@ def test_bernoulli_mem_overlap(self, device):
         with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
             torch.bernoulli(torch.rand_like(x), out=x)
 
+    def test_index_put_mem_overlap(self, device):
+        x = torch.rand((1,), device=device).expand((6,))
+        y = torch.rand((6,), device=device)
+        ind = torch.tensor([0, 2, 3], device=device)
+        value = torch.rand((3,), device=device)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            x.index_put_((ind,), value)
+
+    def test_masked_fill_mem_overlap(self, device):
+        x = torch.rand((1,), device=device).expand((6,))
+        mask = torch.tensor([True, False, True, True, False, False], device=device)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            x.masked_fill_(mask, 0.)
+
+        fill_val = torch.tensor(0., device=device)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            x.masked_fill_(mask, fill_val)
+
+    def test_masked_select_mem_overlap(self, device):
+        x = torch.rand((1,), device=device).expand((3,))
+        y = torch.rand((6,), device=device)
+        mask = torch.tensor([True, False, True, True, False, False], device=device)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            torch.masked_select(y, mask, out=x)
+
+    def test_masked_scatter_mem_overlap(self, device):
+        x = torch.rand((1,), device=device).expand((6,))
+        src = torch.rand((3,), device=device)
+        mask = torch.tensor([True, False, True, True, False, False], device=device)
+
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            x.masked_scatter_(mask, src)
+
+    def test_index_select_mem_overlap(self, device):
+        x = torch.rand((1, 6), device=device).expand((2, 6))
+        y = torch.rand((3, 6), device=device)
+        ind = torch.tensor([0, 1], dtype=torch.int64, device=device)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            torch.index_select(y, 1, ind, out=x)
+
+    def test_cat_mem_overlap(self, device):
+        x = torch.rand((1, 3), device=device).expand((6, 3))
+        y = torch.rand((3, 3), device=device)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            torch.cat([y, y], out=x)
+
+    def test_scatter_mem_overlap(self, device):
+        x = torch.rand((1,), device=device).expand((6,))
+        src = torch.rand((3,), device=device)
+        ind = torch.tensor([0, 2, 3], device=device, dtype=torch.int64)
+
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            x.scatter_(0, ind, src)
+
+    def test_gather_mem_overlap(self, device):
+        x = torch.rand((1,), device=device).expand((3,))
+        src = torch.rand((6,), device=device)
+        ind = torch.tensor([0, 2, 3], device=device, dtype=torch.int64)
+        with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
+            torch.gather(src, 0, ind, out=x)
+
     def test_linlogspace_mem_overlap(self, device):
         x = torch.rand(1, device=device).expand(10)
         with self.assertRaisesRegex(RuntimeError, 'unsupported operation'):
diff --git a/torch/distributions/geometric.py b/torch/distributions/geometric.py
@@ -88,7 +88,8 @@ def sample(self, sample_shape=torch.Size()):
     def log_prob(self, value):
         if self._validate_args:
             self._validate_sample(value)
-        value, probs = broadcast_all(value, self.probs.clone(memory_format=torch.contiguous_format))
+        value, probs = broadcast_all(value, self.probs)
+        probs = probs.clone(memory_format=torch.contiguous_format)
         probs[(probs == 1) & (value == 0)] = 0
         return value * (-probs).log1p() + self.probs.log()
 
diff --git a/torch/distributions/multinomial.py b/torch/distributions/multinomial.py
@@ -101,7 +101,8 @@ def sample(self, sample_shape=torch.Size()):
     def log_prob(self, value):
         if self._validate_args:
             self._validate_sample(value)
-        logits, value = broadcast_all(self.logits.clone(memory_format=torch.contiguous_format), value)
+        logits, value = broadcast_all(self.logits, value)
+        logits = logits.clone(memory_format=torch.contiguous_format)
         log_factorial_n = torch.lgamma(value.sum(-1) + 1)
         log_factorial_xs = torch.lgamma(value + 1).sum(-1)
         logits[(value == 0) & (logits == -inf)] = 0

Original file line number	Diff line number	Diff line change
`@@ -314,6 +314,7 @@ Tensor& cat_out_cuda(Tensor& out, TensorList inputs, int64_t dimension) {`
`314`	`314`	`"of the output memory locations. Found overlap in input "`
`315`	`315`	`"tensor ", i);`
`316`	`316`	`}`
	`317`	`+ at::assert_no_internal_overlap(out);`
`317`	`318`
`318`	`319`	`for (int i = 0; i < inputs.size(); i++)`
`319`	`320`	`{`