pytorch · ssnl · Jun 25, 2018 · Jun 19, 2018 · Jun 20, 2018 · Jun 20, 2018
diff --git a/aten/src/ATen/Declarations.cwrap b/aten/src/ATen/Declarations.cwrap
@@ -3754,11 +3754,7 @@
     - THSize* size
     - THStride* stride
     - arg: int64_t storage_offset
-      default: -1
   aten_custom_call: |
-    if (storage_offset == -1) {
-      storage_offset = self_->tensor->storageOffset;
-    }
     ${THTensor}_setStorage(${state,}result_->tensor, self_->tensor->storage, storage_offset, size_, stride_);
     result_->maybeScalar(size.size() == 0);
 ]]
@@ -3773,11 +3769,7 @@
     - THSize* size
     - THStride* stride
     - arg: int64_t storage_offset
-      default: -1
   aten_custom_call: |
-    if (storage_offset == -1) {
-      storage_offset = self_->tensor->storageOffset;
-    }
     ${THTensor}_setStorage(${state,}self_->tensor, self_->tensor->storage, storage_offset, size_, stride_);
     self_->maybeScalar(size.size() == 0);
 ]]

diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp
@@ -112,6 +112,14 @@ Tensor expand_as(const Tensor& self, const Tensor& other) {
   return self.expand(other.sizes());
 }
 
+Tensor as_strided(const Tensor& self, IntList size, IntList stride) {
+  return self.as_strided(size, stride, self.storage_offset());
+}
+
+Tensor &as_strided_(Tensor& self, IntList size, IntList stride) {
+  return self.as_strided_(size, stride, self.storage_offset());
+}
+
 Tensor narrow(const Tensor& self, int64_t dim, int64_t start, int64_t length) {
   AT_CHECK(self.dim() > 0, "narrow() cannot be applied to a 0-dim tensor.");
   auto cur_size = self.size(dim);
@@ -125,7 +133,7 @@ Tensor narrow(const Tensor& self, int64_t dim, int64_t start, int64_t length) {
 #endif
     AT_ERROR("start (", start, ") + length (", length, ") exceeds dimension size (", cur_size, ").");
   }
-  return at::native::slice(self, dim, start, start + length, 1);
+  return at::slice(self, dim, start, start + length, 1);
 }
 
 Tensor permute(const Tensor& self, IntList dims) {

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -146,6 +146,11 @@
 - func: argmin(Tensor self) -> Tensor
 - func: _argmin(Tensor self, int64_t dim, bool keepdim=false) -> Tensor
 
+# The actual implementations live in Declarations.cwrap. These are just to
+# provide default values for storage_offset=self.storage_offset()
+- func: as_strided(Tensor self, IntList size, IntList stride) -> Tensor
+- func: as_strided_(Tensor self, IntList size, IntList stride) -> Tensor
+
 - func: asin(Tensor self) -> Tensor
 
 - func: asin_(Tensor self) -> Tensor

diff --git a/test/test_autograd.py b/test/test_autograd.py
@@ -2053,13 +2053,42 @@ def test_dir(self):
             self.assertTrue(hasattr(x, key))
 
     def test_as_strided(self):
-        x = Variable(torch.arange(0., 25).view(5, 5), requires_grad=True)
 
-        def as_strided(x):
-            return x.as_strided([3, 3], [6, 2], 2)
+        def test(x, repro_fn, *args):
+            def closure(x):
+                if repro_fn is not None:
+                    x = repro_fn(x)
+                return x.as_strided(*args)
 
-        gradcheck(as_strided, [x], raise_exception=True)
-        gradgradcheck(as_strided, [x], [torch.randn(3, 3)])
+            x = x.to(torch.double).detach().requires_grad_()
+            gradcheck(closure, [x])
+            gradgradcheck(closure, [x])
+
+        # test
+        test(torch.arange(0, 25), lambda x: x.view(5, 5), [3, 3], [6, 2], 2)
+
+        # test crazy stride at dim with size 1 case
+        test(torch.randn(10), None, [1, 2, 1, 5], [0, 5, 100, 1], 2)
+
+        # test expand case
+        test(torch.randn(5), None, [3, 3, 3], [0, 1, 0], 2)
+        test(torch.randn(5), None, [3, 3, 3], [0, 0, 0], 4)
+        test(torch.randn(5), lambda x: x.expand(5, 5), [5, 5], [0, 1], 0)
+
+        # test non-expand overlapping case
+        test(torch.randn(35), None, [6, 6], [5, 1], 2)
+        test(torch.randn(15), None, [3, 2], [3, 6], 2)
+
+        # test transpose case
+        test(torch.randn(3, 4), None, [4, 3], [1, 4])
+
+        # test "getting things outside the input" case
+        x = torch.randn(6, 2)
+        test(x[3:], None, [3, 2], [2, 1], 0)  # should be all zeros
+        self.assertEqual(x[3:].as_strided([3, 2], [2, 1], 0), x[:3])
+
+        # test select on expanded input case
+        test(torch.randn(2, 3), lambda x: x.expand(10, 2, 3), [2, 3], [3, 1], 0)
 
     def _test_where_functional(self, t):
         x = Variable(t(torch.randn(5, 5)), requires_grad=True)
@@ -2334,13 +2363,13 @@ def backward(ctx, gO):
 
         inp = torch.rand(size, requires_grad=True)
         out = MyFunc.apply(inp, inp, True)
-        with self.assertRaisesRegexp(RuntimeError, "Function 'MyFuncBackward' returned nan values in its 0th output."):
+        with self.assertRaisesRegex(RuntimeError, "Function 'MyFuncBackward' returned nan values in its 0th output."):
             with detect_anomaly():
                 out.backward()
 
         inp = torch.rand(size, requires_grad=True)
         out = MyFunc.apply(inp, inp, False)
-        with self.assertRaisesRegexp(RuntimeError, "Function 'MyFuncBackward' returned nan values in its 1th output."):
+        with self.assertRaisesRegex(RuntimeError, "Function 'MyFuncBackward' returned nan values in its 1th output."):
             with detect_anomaly():
                 out.backward()
 

diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
@@ -342,7 +342,7 @@
   self: -at::mm(output.t(), at::mm(grad, output.t()))
 
 - name: kthvalue(Tensor self, int64_t k, int64_t dim, bool keepdim)
-  self: select_backward(grad, dim, indices, self.sizes(), keepdim)
+  self: index_select_backward(grad, dim, indices, self.sizes(), keepdim)
 
 - name: le_(Tensor self, Scalar other)
   self: zeros_like(self)
@@ -407,10 +407,10 @@
   self: zeros_like(self).masked_scatter_(mask, grad)
 
 - name: max(Tensor self, int64_t dim, bool keepdim)
-  self: select_backward(grad, dim, max_indices, self.sizes(), keepdim)
+  self: index_select_backward(grad, dim, max_indices, self.sizes(), keepdim)
 
 - name: max(Tensor self)
-  self: select_backward_scalar(grad, self, result)
+  self: select_equals_backward(grad, self, result)
 
 - name: max(Tensor self, Tensor other)
   self: grad.clone().masked_fill_(self <= other, 0)
@@ -423,16 +423,30 @@
   self: grad.expand(self.sizes()) / self.numel()
 
 - name: median(Tensor self)
-  self: select_backward_scalar(grad, self, result)
-
+  self: select_equals_backward(grad, self, result)
+
+# This is in theory incorrect in the following case:
+#   sorted list: [..., a, b, b, ..., b, b, c, ...] with median = b and the value
+#                            |                     at middle position of the
+#                            |                     list between two `b`s. E.g.,
+#                            |
+#                            ^the middle position
+# The gradient exists and is essentially 0 in this case.
+#
+# In case where the middle position is at the boundary of `b` range, e.g.,
+#   sorted list: [..., a, b, b, ..., b, b, c, ...]
+#                                       |
+#                                       ^the middle position
+# The backward implementation is correct in the sense that it returns the
+# subgradient on one side.
 - name: median(Tensor self, int64_t dim, bool keepdim)
-  self: select_backward(grad, dim, indices, self.sizes(), keepdim)
+  self: index_select_backward(grad, dim, indices, self.sizes(), keepdim)
 
 - name: min(Tensor self, int64_t dim, bool keepdim)
-  self: select_backward(grad, dim, min_indices, self.sizes(), keepdim)
+  self: index_select_backward(grad, dim, min_indices, self.sizes(), keepdim)
 
 - name: min(Tensor self)
-  self: select_backward_scalar(grad, self, result)
+  self: select_equals_backward(grad, self, result)
 
 - name: min(Tensor self, Tensor other)
   self: grad.clone().masked_fill_(self >= other, 0)
@@ -443,7 +457,7 @@
   mat2: mm_mat2_backward(grad, self, mat2.sizes(), mat2.strides(), 1)
 
 - name: mode(Tensor self, int64_t dim, bool keepdim)
-  self: select_backward(grad, dim, indices, self.sizes(), keepdim)
+  self: index_select_backward(grad, dim, indices, self.sizes(), keepdim)
 
 - name: mul(Tensor self, Scalar other)
   self: grad * other
@@ -561,6 +575,13 @@
 - name: repeat(Tensor self, IntList repeats)
   self: repeat_backward(grad, self.dim(), repeats)
 
+# DO NOT define a backward for reshape!
+# reshape is special in that it sometimes returns a view, and somtimes not.
+# Defining a backward will make codegen spit out the forward call as
+#     as_variable(baseType->reshape(self)),
+# making it impossible (hard) to detect when it is actually a view.
+# - name: reshape(Tensor self, IntList shape)
+
 - name: RoiPooling2d_forward(Tensor input, Tensor rois, int64_t pooledHeight, int64_t pooledWidth, double spatialScale)
   input: RoiPooling2d_backward(input, rois, pooledHeight, pooledWidth, spatialScale, grad, result1)
 
@@ -581,6 +602,9 @@
   self: grad
   src: grad.gather(dim, index)
 
+- name: select(Tensor self, int64_t dim, int64_t index)
+  self: slice_backward(grad.unsqueeze(dim), self.sizes(), dim, index, index + 1, 1)
+
 - name: sigmoid(Tensor self)
   self: _sigmoid_backward(grad, result)
 
@@ -593,11 +617,14 @@
 - name: sinh(Tensor self)
   self: grad * self.cosh()
 
+- name: slice(Tensor self, int64_t dim, int64_t start, int64_t end, int64_t step)
+  self: slice_backward(grad, self.sizes(), dim, start, end, step)
+
 - name: slogdet(Tensor self)
   self: slogdet_backward(grads, self, result0, result1)
 
 - name: sort(Tensor self, int64_t dim, bool descending)
-  self: select_backward(grad, dim, indices, self.sizes(), true)
+  self: index_select_backward(grad, dim, indices, self.sizes(), true)
 
 - name: split(Tensor self, int64_t split_size, int64_t dim)
   self: split_backward(grads, split_size, dim, self.sizes(), self.type())
@@ -614,6 +641,12 @@
 - name: squeeze(Tensor self, int64_t dim)
   self: unsqueeze_to(grad, dim, self.sizes())
 
+- name: squeeze_(Tensor self)
+  self: unsqueeze_to(grad, self.sizes());
+
+- name: squeeze_(Tensor self, int64_t dim)
+  self: unsqueeze_to(grad, dim, self.sizes())
+
 - name: std(Tensor self, bool unbiased)
   self: var_backward(grad / (result * 2), self, unbiased)
 
@@ -659,14 +692,17 @@
   self: _tanh_backward(grad, result)
 
 - name: topk(Tensor self, int64_t k, int64_t dim, bool largest, bool sorted)
-  self: select_backward(grad, dim, indices, self.sizes(), true)
+  self: index_select_backward(grad, dim, indices, self.sizes(), true)
 
 - name: trace(Tensor self)
   self: trace_backward(grad, self.sizes())
 
 - name: transpose(Tensor self, int64_t dim0, int64_t dim1)
   self: grad.transpose(dim0, dim1)
 
+- name: transpose_(Tensor self, int64_t dim0, int64_t dim1)
+  self: grad.transpose(dim0, dim1)
+
 - name: tril(Tensor self, int64_t diagonal)
   self: grad.tril(diagonal)
 
@@ -689,19 +725,22 @@
   self: not_implemented("_unique")
 
 - name: _unsafe_view(Tensor self, IntList size)
-  self: grad.contiguous().view(self.sizes())
+  self: grad.reshape(self.sizes())
 
 - name: unsqueeze(Tensor self, int64_t dim)
   self: grad.squeeze(dim)
 
+- name: unsqueeze_(Tensor self, int64_t dim)
+  self: grad.squeeze(dim)
+
 - name: var(Tensor self, bool unbiased)
   self: var_backward(grad, self, unbiased)
 
 - name: var(Tensor self, int64_t dim, bool unbiased, bool keepdim)
   self: var_backward(grad, self, dim, unbiased, keepdim)
 
 - name: view(Tensor self, IntList size)
-  self: grad.contiguous().view(self.sizes())
+  self: grad.reshape(self.sizes())
 
 - name: _s_where(Tensor condition, Tensor self, Tensor other)
   self: where(condition, grad, zeros_like(grad))