pytorch
diff --git a/‎aten/src/ATen/CPUApplyUtils.h‎
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/CPUApplyUtils.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/Declarations.cwrap‎
Lines changed: 0 additions & 32 deletions b/‎aten/src/ATen/Declarations.cwrap‎
Lines changed: 0 additions & 32 deletions
diff --git a/‎aten/src/ATen/core/Tensor.h‎
Lines changed: 3 additions & 5 deletions b/‎aten/src/ATen/core/Tensor.h‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎aten/src/ATen/core/TensorMethods.h‎
Lines changed: 4 additions & 10 deletions b/‎aten/src/ATen/core/TensorMethods.h‎
Lines changed: 4 additions & 10 deletions
diff --git a/‎aten/src/ATen/core/Type.h‎
Lines changed: 2 additions & 4 deletions b/‎aten/src/ATen/core/Type.h‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎aten/src/ATen/cpu/vec256/vec256_base.h‎
Lines changed: 10 additions & 0 deletions b/‎aten/src/ATen/cpu/vec256/vec256_base.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎aten/src/ATen/cpu/vec256/vec256_int.h‎
Lines changed: 32 additions & 0 deletions b/‎aten/src/ATen/cpu/vec256/vec256_int.h‎
Lines changed: 32 additions & 0 deletions
@@ -207,7 +207,7 @@ inline std::string _all_equal_numel_error(at::ArrayRef<Tensor> tensors) {
   for (size_t i = 0; i < tensors.size() - 1; i++) {
     oss << tensors[i].sizes() << ", ";
   }
-  oss << "and " << tensors[tensors.size() - 1]
+  oss << "and " << tensors[tensors.size() - 1].sizes()
       << " to have the same number of elements, but got ";
   for (size_t i = 0; i < tensors.size() - 1; i++) {
     oss << tensors[i].numel() << ", ";
@@ -220,7 +220,7 @@ inline std::string _all_equal_numel_error(at::ArrayRef<Tensor> tensors) {
 inline bool _apply_preamble(ArrayRef<Tensor> tensors) {
   checkBackend("CPU_tensor_apply", tensors, Backend::CPU);
   if (!_all_equal_numel(tensors))
-    throw std::runtime_error(_all_equal_numel_error(tensors));
+    AT_ERROR(_all_equal_numel_error(tensors));
   // An empty tensor has no elements
   for (auto& t : tensors)
     if (t.numel() == 0)
 
@@ -3218,38 +3218,6 @@
       kwarg_only: True
     - double p
 ]]
-[[
-  name: _bernoulli_
-  backends:
-    - CPU
-    - CUDA
-  cname: bernoulli
-  return: self
-  variants: function
-  arguments:
-    - THTensor* self
-    - arg: THGenerator* generator
-      default: nullptr
-      kwarg_only: True
-    - double p
-]]
-[[
-  name: _th_bernoulli
-  types:
-    - Float
-    - Double
-  return: argument 0
-  variants: function
-  cname: bernoulli_Tensor
-  arguments:
-    - arg: THTensor* output
-      output: True
-      resize: self
-    - arg: THGenerator* generator
-      default: nullptr
-      kwarg_only: True
-    - THTensor* self
-]]
 [[
   name: _dirichlet_grad
   types:
 
@@ -441,12 +441,10 @@ struct AT_API Tensor {
   Tensor & atan_();
   Tensor baddbmm(const Tensor & batch1, const Tensor & batch2, Scalar beta=1, Scalar alpha=1) const;
   Tensor & baddbmm_(const Tensor & batch1, const Tensor & batch2, Scalar beta=1, Scalar alpha=1);
-  Tensor bernoulli(const Tensor & p, Generator * generator=nullptr) const;
-  Tensor bernoulli(double p, Generator * generator=nullptr) const;
-  Tensor bernoulli() const;
+  Tensor bernoulli(Generator * generator=nullptr) const;
   Tensor & bernoulli_(const Tensor & p, Generator * generator=nullptr);
-  Tensor & bernoulli_(double p, Generator * generator=nullptr);
-  Tensor & bernoulli_();
+  Tensor & bernoulli_(double p=0.5, Generator * generator=nullptr);
+  Tensor bernoulli(double p, Generator * generator=nullptr) const;
   Tensor bincount(const Tensor & weights={}, int64_t minlength=0) const;
   Tensor bmm(const Tensor & mat2) const;
   Tensor ceil() const;
 
@@ -605,23 +605,17 @@ inline Tensor Tensor::baddbmm(const Tensor & batch1, const Tensor & batch2, Scal
 inline Tensor & Tensor::baddbmm_(const Tensor & batch1, const Tensor & batch2, Scalar beta, Scalar alpha) {
     return type().baddbmm_(*this, batch1, batch2, beta, alpha);
 }
-inline Tensor Tensor::bernoulli(const Tensor & p, Generator * generator) const {
-    return type().bernoulli(*this, p, generator);
-}
-inline Tensor Tensor::bernoulli(double p, Generator * generator) const {
-    return type().bernoulli(*this, p, generator);
-}
-inline Tensor Tensor::bernoulli() const {
-    return type().bernoulli(*this);
+inline Tensor Tensor::bernoulli(Generator * generator) const {
+    return type().bernoulli(*this, generator);
 }
 inline Tensor & Tensor::bernoulli_(const Tensor & p, Generator * generator) {
     return type().bernoulli_(*this, p, generator);
 }
 inline Tensor & Tensor::bernoulli_(double p, Generator * generator) {
     return type().bernoulli_(*this, p, generator);
 }
-inline Tensor & Tensor::bernoulli_() {
-    return type().bernoulli_(*this);
+inline Tensor Tensor::bernoulli(double p, Generator * generator) const {
+    return type().bernoulli(*this, p, generator);
 }
 inline Tensor Tensor::bincount(const Tensor & weights, int64_t minlength) const {
     return type().bincount(*this, weights, minlength);
 
@@ -397,12 +397,10 @@ struct AT_API Type {
   virtual Tensor & atan_(Tensor & self) const = 0;
   virtual Tensor baddbmm(const Tensor & self, const Tensor & batch1, const Tensor & batch2, Scalar beta, Scalar alpha) const = 0;
   virtual Tensor & baddbmm_(Tensor & self, const Tensor & batch1, const Tensor & batch2, Scalar beta, Scalar alpha) const = 0;
-  virtual Tensor bernoulli(const Tensor & self, const Tensor & p, Generator * generator) const = 0;
-  virtual Tensor bernoulli(const Tensor & self, double p, Generator * generator) const = 0;
-  virtual Tensor bernoulli(const Tensor & self) const = 0;
+  virtual Tensor bernoulli(const Tensor & self, Generator * generator) const = 0;
   virtual Tensor & bernoulli_(Tensor & self, const Tensor & p, Generator * generator) const = 0;
   virtual Tensor & bernoulli_(Tensor & self, double p, Generator * generator) const = 0;
-  virtual Tensor & bernoulli_(Tensor & self) const = 0;
+  virtual Tensor bernoulli(const Tensor & self, double p, Generator * generator) const = 0;
   virtual Tensor bincount(const Tensor & self, const Tensor & weights, int64_t minlength) const = 0;
   virtual Tensor bmm(const Tensor & self, const Tensor & mat2) const = 0;
   virtual Tensor ceil(const Tensor & self) const = 0;
 
@@ -438,4 +438,14 @@ interleave2(const Vec256<T>& a, const Vec256<T>& b) {
                         Vec256<T>::loadu(static_cast<void*>(buffer2)));
 }
 
+template <typename src_T, typename dst_T>
+void convert(const src_T *src, dst_T *dst, int64_t n) {
+#pragma unroll
+  for (int64_t i = 0; i < n; i++) {
+    *dst = static_cast<dst_T>(*src);
+    src++;
+    dst++;
+  }
+}
+
 }}}
@@ -208,6 +208,38 @@ struct Vec256<int32_t> : public Vec256i {
   }
 };
 
+template <>
+void convert(const int32_t *src, float *dst, int64_t n) {
+  int64_t i;
+  // int32_t and float have same size
+#pragma unroll
+  for (i = 0; i <= (n - Vec256<int32_t>::size); i += Vec256<int32_t>::size) {
+    auto input_vec = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(src + i));
+    auto output_vec = _mm256_cvtepi32_ps(input_vec);
+    _mm256_storeu_ps(reinterpret_cast<float*>(dst + i), output_vec);
+  }
+#pragma unroll
+  for (; i < n; i++) {
+    dst[i] = static_cast<float>(src[i]);
+  }
+}
+
+template <>
+void convert(const int32_t *src, double *dst, int64_t n) {
+  int64_t i;
+  // int32_t has half the size of double
+#pragma unroll
+  for (i = 0; i <= (n - Vec256<double>::size); i += Vec256<double>::size) {
+    auto input_128_vec = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src + i));
+    auto output_vec = _mm256_cvtepi32_pd(input_128_vec);
+    _mm256_storeu_pd(reinterpret_cast<double*>(dst + i), output_vec);
+  }
+#pragma unroll
+  for (; i < n; i++) {
+    dst[i] = static_cast<double>(src[i]);
+  }
+}
+
 template <>
 struct Vec256<int16_t> : public Vec256i {
   static constexpr int size = 16;
Original file line number	Diff line number	Diff line change
`@@ -605,23 +605,17 @@ inline Tensor Tensor::baddbmm(const Tensor & batch1, const Tensor & batch2, Scal`
`605`	`605`	`inline Tensor & Tensor::baddbmm_(const Tensor & batch1, const Tensor & batch2, Scalar beta, Scalar alpha) {`
`606`	`606`	`return type().baddbmm_(*this, batch1, batch2, beta, alpha);`
`607`	`607`	`}`
`608`		`-inline Tensor Tensor::bernoulli(const Tensor & p, Generator * generator) const {`
`609`		`- return type().bernoulli(*this, p, generator);`
`610`		`-}`
`611`		`-inline Tensor Tensor::bernoulli(double p, Generator * generator) const {`
`612`		`- return type().bernoulli(*this, p, generator);`
`613`		`-}`
`614`		`-inline Tensor Tensor::bernoulli() const {`
`615`		`- return type().bernoulli(*this);`
	`608`	`+inline Tensor Tensor::bernoulli(Generator * generator) const {`
	`609`	`+ return type().bernoulli(*this, generator);`
`616`	`610`	`}`
`617`	`611`	`inline Tensor & Tensor::bernoulli_(const Tensor & p, Generator * generator) {`
`618`	`612`	`return type().bernoulli_(*this, p, generator);`
`619`	`613`	`}`
`620`	`614`	`inline Tensor & Tensor::bernoulli_(double p, Generator * generator) {`
`621`	`615`	`return type().bernoulli_(*this, p, generator);`
`622`	`616`	`}`
`623`		`-inline Tensor & Tensor::bernoulli_() {`
`624`		`- return type().bernoulli_(*this);`
	`617`	`+inline Tensor Tensor::bernoulli(double p, Generator * generator) const {`
	`618`	`+ return type().bernoulli(*this, p, generator);`
`625`	`619`	`}`
`626`	`620`	`inline Tensor Tensor::bincount(const Tensor & weights, int64_t minlength) const {`
`627`	`621`	`return type().bincount(*this, weights, minlength);`