Reimplement as_strided in ATen. (#13185)

zou3519 · facebook-github-bot · commit 8a888c48da31 · 2018-10-30T07:52:50.000-07:00
Summary: This moves away from using tensor.set_(...) for as_strided, which went through TH and was weirdly slow/complicated. The new as_strided has a new invariant that it will never resize the storage to a larger size (the previous as_strided allowed that behavior but it seemed weird and none of our code relied on it.) This offers a small speedup on as_strided: it went from 1300ns to 1100ns although the benchmarks get a little noisy here. Also on the changelog is a quick fix to resize_ code to avoid unsigned underflow. I'll rewrite the resize_ zero dim logic in a future diff, it doesn't make sense the way it is written right now. Pull Request resolved: #13185 Reviewed By: ezyang Differential Revision: D12809160 Pulled By: zou3519 fbshipit-source-id: 3885df9d863baab2b2f8d8e2f8e2bfe660a49d85
diff --git a/aten/src/ATen/core/TensorImpl.h b/aten/src/ATen/core/TensorImpl.h
@@ -726,6 +726,17 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target {
     storage_offset_ = storage_offset;
   }
 
+  /* Sets the storage of this tensor to be new_storage */
+  void set_storage(const Storage& new_storage) {
+    auto* new_storage_ = new_storage.unsafeGetStorageImpl();
+    auto* old_storage_ = storage_.unsafeGetStorageImpl();
+    AT_ASSERTM(old_storage_, "Tensor: invalid null storage");
+    if (new_storage_ == old_storage_) {
+      return;
+    }
+    storage_ = new_storage;
+  }
+
   /**
    * Like set_sizes_and_strides but assumes contiguous strides.
    *
diff --git a/aten/src/ATen/native/Resize.h b/aten/src/ATen/native/Resize.h
@@ -30,11 +30,13 @@ inline TensorImpl* resize_impl_cpu_(
     return self;
   }
 
-  size_t storage_size = 1;
+  int64_t storage_size = 1;
   if (stride) {
     self->set_sizes_and_strides(size, *stride);
     // NB: storage size can be different from numel.
     for (size_t dim = 0; dim < size.size(); ++dim) {
+      // FIXME: Don't rely on storage_size being negative because this
+      // may not be true for some edge cases.
       storage_size += (size[dim] - 1) * stride.value()[dim];
     }
   } else {
@@ -46,4 +48,63 @@ inline TensorImpl* resize_impl_cpu_(
   return self;
 }
 
+static inline int64_t computeStorageSize(IntList sizes, IntList strides) {
+  int64_t storage_size = 1;
+  for (size_t dim = 0; dim < sizes.size(); ++dim) {
+    if (sizes[dim] == 0) {
+      return 0;
+    }
+    storage_size += strides[dim] * (sizes[dim] - 1);
+  }
+  return storage_size;
+}
+
+static inline void checkInBoundsForStorage(
+    IntList size,
+    IntList stride,
+    int64_t storage_offset,
+    const Storage& new_storage) {
+  int64_t storage_size = computeStorageSize(size, stride);
+  if (storage_size == 0) {
+    // NB: (a tensor with arbitrary 0 dims)'s storage can have any numel.
+    return;
+  }
+  int64_t new_storage_size = new_storage.numel();
+  AT_CHECK(
+      storage_offset + storage_size <= new_storage_size,
+      "setStorage: sizes ", size, ", strides ", stride, ","
+      " and storage offset ", storage_offset,
+      " requiring a storage size of ", storage_size + storage_offset,
+      " are out of bounds for storage with numel ", new_storage_size);
+}
+
+/**
+ * Set self's storage to be new_storage with sizes, strides, and storage_offset.
+ * (size, stride, storage_offset) must be in bounds for the new storage.
+ */
+inline void setStorage(
+    const Tensor& self,
+    const Storage& new_storage,
+    int64_t storage_offset,
+    IntList size,
+    IntList stride) {
+  checkInBoundsForStorage(size, stride, storage_offset, new_storage);
+
+  auto* self_ = self.unsafeGetTensorImpl();
+
+  /* storage */
+  self_->set_storage(new_storage);
+
+  /* storage offset */
+  AT_CHECK(storage_offset >= 0, "Tensor: invalid storage offset ", storage_offset);
+  self_->set_storage_offset(storage_offset);
+
+  /* size and stride */
+  AT_ASSERT(size.size() == stride.size());
+  if (self_->sizes() == size && self_->strides() == stride) {
+    return;
+  }
+  self_->set_sizes_and_strides(size, stride);
+}
+
 }}
diff --git a/aten/src/ATen/native/TensorShape.cpp b/aten/src/ATen/native/TensorShape.cpp
@@ -8,6 +8,7 @@
 #include "ATen/WrapDimUtils.h"
 #include "c10/util/Exception.h"
 #include "c10/util/Optional.h"
+#include "ATen/native/Resize.h"
 #include <ATen/SparseTensorUtils.h>
 #include <algorithm>
 #include <vector>
@@ -150,19 +151,32 @@ Tensor expand_as(const Tensor& self, const Tensor& other) {
 }
 
 Tensor as_strided(const Tensor& self, IntList size, IntList stride, int64_t storage_offset) {
-  return at::empty({0}, self.options()).set_(self.storage(), storage_offset, size, stride);
+  auto result = at::empty({0}, self.options());
+  setStorage(
+      result,
+      self.storage(),
+      storage_offset,
+      size,
+      stride);
+  return result;
 }
 
 Tensor &as_strided_(Tensor& self, IntList size, IntList stride, int64_t storage_offset) {
-  return self.set_(self.storage(), storage_offset, size, stride);
+  setStorage(
+      self,
+      self.storage(),
+      storage_offset,
+      size,
+      stride);
+  return self;
 }
 
 Tensor as_strided(const Tensor& self, IntList size, IntList stride) {
   return at::as_strided(self, size, stride, self.storage_offset());
 }
 
 Tensor &as_strided_(Tensor& self, IntList size, IntList stride) {
-  return at::as_strided_(self, size, stride, self.storage_offset());
+  return self.as_strided_(size, stride, self.storage_offset());
 }
 
 Tensor narrow_copy_sparse(const Tensor& self, int64_t dim, int64_t start, int64_t length) {
diff --git a/aten/src/ATen/native/cuda/Resize.cuh b/aten/src/ATen/native/cuda/Resize.cuh
@@ -38,11 +38,13 @@ inline TensorImpl* resize_impl_cuda_(
     guard = DeviceGuard(self->storage().device().index());
   }
 
-  size_t storage_size = 1;
+  int64_t storage_size = 1;
   if (stride) {
     self->set_sizes_and_strides(size, *stride);
     // NB: storage size can be different from numel.
     for (size_t dim = 0; dim < size.size(); ++dim) {
+      // FIXME: Don't rely on storage_size being negative because this
+      // may not be true for some edge cases.
       storage_size += (size[dim] - 1) * stride.value()[dim];
     }
   } else {
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -201,17 +201,21 @@
 
 - func: as_strided(Tensor self, IntList size, IntList stride) -> Tensor
   variants: function, method
+  device_guard: false
 
 - func: as_strided_(Tensor self, IntList size, IntList stride) -> Tensor
   variants: function, method
+  device_guard: false
 
 - func: as_strided(Tensor self, IntList size, IntList stride, int64_t storage_offset) -> Tensor
   variants: function, method
+  device_guard: false
   python_default_init:
     storage_offset: self.storage_offset()
 
 - func: as_strided_(Tensor self, IntList size, IntList stride, int64_t storage_offset) -> Tensor
   variants: function, method
+  device_guard: false
   python_default_init:
     storage_offset: self.storage_offset()
 

Original file line number	Diff line number	Diff line change
`@@ -38,11 +38,13 @@ inline TensorImpl* resize_impl_cuda_(`
`38`	`38`	`guard = DeviceGuard(self->storage().device().index());`
`39`	`39`	`}`
`40`	`40`
`41`		`- size_t storage_size = 1;`
	`41`	`+ int64_t storage_size = 1;`
`42`	`42`	`if (stride) {`
`43`	`43`	`self->set_sizes_and_strides(size, *stride);`
`44`	`44`	`// NB: storage size can be different from numel.`
`45`	`45`	`for (size_t dim = 0; dim < size.size(); ++dim) {`
	`46`	`+ // FIXME: Don't rely on storage_size being negative because this`
	`47`	`+ // may not be true for some edge cases.`
`46`	`48`	`storage_size += (size[dim] - 1) * stride.value()[dim];`
`47`	`49`	`}`
`48`	`50`	`} else {`