-
Notifications
You must be signed in to change notification settings - Fork 26.3k
Move abs, frac, reciprocal, and neg to TensorIterator #19041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -408,6 +408,8 @@ class CAFFE2_API Tensor { | |
| Tensor & fill_(const Tensor & value); | ||
| Tensor floor() const; | ||
| Tensor & floor_(); | ||
| Tensor frac() const; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are these moving around?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the declarations in native_functions.yaml so that they're not under this comment: https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/native/native_functions.yaml#L3182 |
||
| Tensor & frac_(); | ||
| Tensor ger(const Tensor & vec2) const; | ||
| Tensor fft(int64_t signal_ndim, bool normalized=false) const; | ||
| Tensor ifft(int64_t signal_ndim, bool normalized=false) const; | ||
|
|
@@ -465,6 +467,10 @@ class CAFFE2_API Tensor { | |
| Tensor permute(IntArrayRef dims) const; | ||
| Tensor pin_memory() const; | ||
| Tensor pinverse(double rcond=1e-15) const; | ||
| Tensor reciprocal() const; | ||
| Tensor & reciprocal_(); | ||
| Tensor neg() const; | ||
| Tensor & neg_(); | ||
| Tensor repeat(IntArrayRef repeats) const; | ||
| Tensor repeat_interleave(const Tensor & repeats, c10::optional<int64_t> dim=c10::nullopt) const; | ||
| Tensor repeat_interleave(int64_t repeats, c10::optional<int64_t> dim=c10::nullopt) const; | ||
|
|
@@ -647,10 +653,7 @@ class CAFFE2_API Tensor { | |
| Tensor & digamma_(); | ||
| Tensor & polygamma_(int64_t n); | ||
| Tensor & erfinv_(); | ||
| Tensor & frac_(); | ||
| Tensor & renorm_(Scalar p, int64_t dim, Scalar maxnorm); | ||
| Tensor & reciprocal_(); | ||
| Tensor & neg_(); | ||
| Tensor & pow_(Scalar exponent); | ||
| Tensor & pow_(const Tensor & exponent); | ||
| Tensor & lerp_(const Tensor & end, Scalar weight); | ||
|
|
@@ -717,10 +720,7 @@ class CAFFE2_API Tensor { | |
| Tensor digamma() const; | ||
| Tensor polygamma(int64_t n) const; | ||
| Tensor erfinv() const; | ||
| Tensor frac() const; | ||
| Tensor dist(const Tensor & other, Scalar p=2) const; | ||
| Tensor reciprocal() const; | ||
| Tensor neg() const; | ||
| Tensor atan2(const Tensor & other) const; | ||
| Tensor lerp(const Tensor & end, Scalar weight) const; | ||
| Tensor lerp(const Tensor & end, const Tensor & weight) const; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -96,6 +96,8 @@ struct Vec256<int64_t> : public Vec256i { | |
| auto inverse = _mm256_xor_si256(values, is_larger); | ||
| return _mm256_sub_epi64(inverse, is_larger); | ||
| } | ||
| Vec256<int64_t> frac() const; | ||
| Vec256<int64_t> neg() const; | ||
| Vec256<int64_t> operator==(const Vec256<int64_t>& other) const { | ||
| return _mm256_cmpeq_epi64(values, other.values); | ||
| } | ||
|
|
@@ -185,6 +187,8 @@ struct Vec256<int32_t> : public Vec256i { | |
| Vec256<int32_t> abs() const { | ||
| return _mm256_abs_epi32(values); | ||
| } | ||
| Vec256<int32_t> frac() const; | ||
| Vec256<int32_t> neg() const; | ||
| Vec256<int32_t> operator==(const Vec256<int32_t>& other) const { | ||
| return _mm256_cmpeq_epi32(values, other.values); | ||
| } | ||
|
|
@@ -369,6 +373,8 @@ struct Vec256<int16_t> : public Vec256i { | |
| Vec256<int16_t> abs() const { | ||
| return _mm256_abs_epi16(values); | ||
| } | ||
| Vec256<int16_t> frac() const; | ||
| Vec256<int16_t> neg() const; | ||
| Vec256<int16_t> operator==(const Vec256<int16_t>& other) const { | ||
| return _mm256_cmpeq_epi16(values, other.values); | ||
| } | ||
|
|
@@ -419,6 +425,19 @@ Vec256<int16_t> inline operator-(const Vec256<int16_t>& a, const Vec256<int16_t> | |
| return _mm256_sub_epi16(a, b); | ||
| } | ||
|
|
||
| // Negation. Defined here so we can utilize operator- | ||
| Vec256<int64_t> Vec256<int64_t>::neg() const { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There does exist an xor instruction for integers as well (for AVX2+). - This could aid further optimization. |
||
| return Vec256<int64_t>(0) - *this; | ||
| } | ||
|
|
||
| Vec256<int32_t> Vec256<int32_t>::neg() const { | ||
| return Vec256<int32_t>(0) - *this; | ||
| } | ||
|
|
||
| Vec256<int16_t> Vec256<int16_t>::neg() const { | ||
| return Vec256<int16_t>(0) - *this; | ||
| } | ||
|
|
||
| // Emulate operations with no native 64-bit support in avx, | ||
| // by extracting each element, performing the operation pointwise, | ||
| // then combining the results into a vector. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why can't you kill the entire entry instead of just the CPU one? How are these called?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
They're called from the stubs in CUDAUnaryOps.cpp
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually turns out they're not so I'm gonna delete these