Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 38 additions & 8 deletions aten/src/ATen/Declarations.cwrap
Original file line number Diff line number Diff line change
Expand Up @@ -1045,25 +1045,55 @@
name: all
types:
- Byte
variants:
- method
- function
backends:
- CPU
- CUDA
cname: logicalall
return: real
arguments:
- THTensor* self
options:
- cname: logicalAndAll
return: real
arguments:
- THTensor* self
- cname: logicalAnd
return: argument 0
scalar_check: self_->isScalar() || (keepdim == false && self_->dim() == 1)
arguments:
- arg: THTensor* result
output: True
- THTensor* self
- arg: long dim
wrap_dim: self
- arg: bool keepdim
default: "false"
]]
[[
name: any
types:
- Byte
variants:
- method
- function
backends:
- CPU
- CUDA
cname: logicalany
return: real
arguments:
- THTensor* self
options:
- cname: logicalAnyAll
return: real
arguments:
- THTensor* self
- cname: logicalAny
return: argument 0
scalar_check: self_->isScalar() || (keepdim == false && self_->dim() == 1)
arguments:
- arg: THTensor* result
output: True
- THTensor* self
- arg: long dim
wrap_dim: self
- arg: bool keepdim
default: "false"
]]
[[
name: getDevice
Expand Down
205 changes: 197 additions & 8 deletions aten/src/TH/generic/THTensorMath.c
Original file line number Diff line number Diff line change
Expand Up @@ -3814,16 +3814,205 @@ LAB_IMPLEMENT_BASIC_FUNCTION(abs,abs)

#if defined(TH_REAL_IS_BYTE)

#define TENSOR_IMPLEMENT_LOGICAL_SUM(NAME, OP, INIT_VALUE) \
int THTensor_(NAME)(THTensor *tensor) \
{ \
int sum = INIT_VALUE; \
TH_TENSOR_APPLY(real, tensor, sum = sum OP *tensor_data;); \
return sum; \
int THTensor_(logicalAndAll)(THTensor *tensor)
{
real prod = 1;
int serial_path = 0;
#ifdef _OPENMP
int inOMP = omp_in_parallel();
if(inOMP) {
serial_path = 1;
} else {
TH_TENSOR_APPLY_REDUCTION_OMP(real, tensor, &&:prod, prod = prod && *tensor_data;);
}
#else
serial_path = 1;
#endif
if (serial_path) {
TH_TENSOR_APPLY(real, tensor, prod = prod && *tensor_data;);
}
return prod;
}

int THTensor_(logicalAnyAll)(THTensor *tensor)
{
real sum = 0;
int serial_path = 0;
#ifdef _OPENMP
int inOMP = omp_in_parallel();
if(inOMP) {
serial_path = 1;
} else {
TH_TENSOR_APPLY_REDUCTION_OMP(real, tensor, ||:sum, sum = sum || *tensor_data;);
}
#else
serial_path = 1;
#endif
if (serial_path) {
TH_TENSOR_APPLY(real, tensor, sum = sum || *tensor_data;);
}
return (bool)sum;
}

void THTensor_(logicalAnd)(THTensor *r_, THTensor *t, int dimension, int keepdim)

This comment was marked as off-topic.

This comment was marked as off-topic.

This comment was marked as off-topic.

{
THLongStorage *dim;

THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
dimension + TH_INDEX_BASE);

THTensor_(preserveReduceDimSemantics)(r_, THTensor_(nDimension)(t), dimension, keepdim);
dim = THTensor_(newSizeOf)(t);
THLongStorage_set(dim, dimension, 1);
THTensor_(resize)(r_, dim, NULL);
THLongStorage_free(dim);

int serial_path = 0;
#ifdef _OPENMP
int inOMP = omp_in_parallel();
if (inOMP) {
serial_path = 1;
} else {
int r_Contig = THTensor_(isContiguous)(r_);
real *tp = THTensor_(data)(t);
real *rp = THTensor_(data)(r_);
if(r_Contig && (tp != rp)){
ptrdiff_t iter = 0;
ptrdiff_t r_Size = THTensor_(nElement)(r_);
int r_Dim = r_->nDimension;
#pragma omp parallel for if ( r_Size > TH_OMP_OVERHEAD_THRESHOLD)
for (iter = 0; iter < r_Size; iter++) {
int j;
int64_t quot;
int64_t rem = iter;
ptrdiff_t tBasicIndex = 0;

for(j = 0; j < r_Dim; ++j) {
if(j != dimension){
quot = rem/r_->stride[j];
rem = rem%r_->stride[j];
tBasicIndex += quot*t->stride[j];
}
}
real *t_data = tp+tBasicIndex;
real *r__data = rp+iter;
*r__data = 1;
for(j=0; j < t->size[dimension]; ++j) {
*r__data = *r__data && *(t_data + j*t->stride[dimension]);
}
}
} else {
serial_path = 1;
}
}
#else
serial_path = 1;
#endif

if(serial_path) {
// two implementations optimized for data locality
if (t->stride[dimension] == 1) {
TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
accreal prod = 1;
int64_t i;
for(i = 0; i < t_size; i++)
prod = prod && t_data[i*t_stride];
*r__data = (real)prod;);
} else {
THTensor_(fill)(r_, 1);
THTensor *temp_ = THTensor_(newWithTensor)(r_);
// r_.expand_as(t)
temp_->size[dimension] = t->size[dimension];
temp_->stride[dimension] = 0;

TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data && *t_data;);
THTensor_(free)(temp_);
}
}
if (!keepdim) {
THTensor_(squeeze1d)(r_, r_, dimension);
}
}

void THTensor_(logicalAny)(THTensor *r_, THTensor *t, int dimension, int keepdim)
{
THLongStorage *dim;

THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension %d out of range",
dimension + TH_INDEX_BASE);

THTensor_(preserveReduceDimSemantics)(r_, THTensor_(nDimension)(t), dimension, keepdim);
dim = THTensor_(newSizeOf)(t);
THLongStorage_set(dim, dimension, 1);
THTensor_(resize)(r_, dim, NULL);
THLongStorage_free(dim);

TENSOR_IMPLEMENT_LOGICAL_SUM(logicalall, &&, 1)
TENSOR_IMPLEMENT_LOGICAL_SUM(logicalany, ||, 0)
int serial_path = 0;
#ifdef _OPENMP
int inOMP = omp_in_parallel();
if (inOMP) {
serial_path = 1;
} else {
int r_Contig = THTensor_(isContiguous)(r_);
real *tp = THTensor_(data)(t);
real *rp = THTensor_(data)(r_);
if(r_Contig && (tp != rp)){
ptrdiff_t iter = 0;
ptrdiff_t r_Size = THTensor_(nElement)(r_);
int r_Dim = r_->nDimension;
#pragma omp parallel for if ( r_Size > TH_OMP_OVERHEAD_THRESHOLD)
for (iter = 0; iter < r_Size; iter++) {
int j;
int64_t quot;
int64_t rem = iter;
ptrdiff_t tBasicIndex = 0;

for(j = 0; j < r_Dim; ++j) {
if(j != dimension){
quot = rem/r_->stride[j];
rem = rem%r_->stride[j];
tBasicIndex += quot*t->stride[j];
}
}
real *t_data = tp+tBasicIndex;
real *r__data = rp+iter;
*r__data = 0;
for(j=0; j < t->size[dimension]; ++j) {
*r__data = *r__data || *(t_data + j*t->stride[dimension]);
}
}
} else {
serial_path = 1;
}
}
#else
serial_path = 1;
#endif
if (serial_path) {
// two implementations optimized for data locality
if (t->stride[dimension] == 1) {
TH_TENSOR_DIM_APPLY2(real, t, real, r_, dimension,
accreal sum = 0;
int64_t i;
for(i = 0; i < t_size; i++)
sum = sum || t_data[i*t_stride];
*r__data = (real)sum;);
} else {
THTensor_(zero)(r_);
THTensor *temp_ = THTensor_(newWithTensor)(r_);
// r_.expand_as(t)
temp_->size[dimension] = t->size[dimension];
temp_->stride[dimension] = 0;

TH_TENSOR_APPLY2(real, temp_, real, t, *temp__data = *temp__data || *t_data;);
THTensor_(free)(temp_);
}
}

if (!keepdim) {
THTensor_(squeeze1d)(r_, r_, dimension);
}
}

#endif /* Byte only part */

Expand Down
6 changes: 4 additions & 2 deletions aten/src/TH/generic/THTensorMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,10 @@ TH_API void THTensor_(dirichlet_grad)(THTensor *self, THTensor *x, THTensor *alp

#if defined(TH_REAL_IS_BYTE)

TH_API int THTensor_(logicalall)(THTensor *self);
TH_API int THTensor_(logicalany)(THTensor *self);
TH_API int THTensor_(logicalAndAll)(THTensor *self);
TH_API int THTensor_(logicalAnyAll)(THTensor *self);
TH_API void THTensor_(logicalAnd)(THTensor *r_, THTensor *t, int dimension, int keepdim);
TH_API void THTensor_(logicalAny)(THTensor *r_, THTensor *t, int dimension, int keepdim);

#endif /* TH_REAL_IS_BYTE */

Expand Down
7 changes: 5 additions & 2 deletions aten/src/THC/THCTensorMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@
#include "generic/THCTensorTopK.h"
#include "THCGenerateAllTypes.h"

THC_API int THCudaByteTensor_logicalall(THCState *state, THCudaByteTensor *self);
THC_API int THCudaByteTensor_logicalany(THCState *state, THCudaByteTensor *self);
THC_API int THCudaByteTensor_logicalAndAll(THCState *state, THCudaByteTensor *self);
THC_API int THCudaByteTensor_logicalAnyAll(THCState *state, THCudaByteTensor *self);

THC_API void THCudaByteTensor_logicalAnd(THCState *state, THCudaByteTensor *self, THCudaByteTensor *src, int dimension, int keepdim);
THC_API void THCudaByteTensor_logicalAny(THCState *state, THCudaByteTensor *self, THCudaByteTensor *src, int dimension, int keepdim);

#endif
36 changes: 34 additions & 2 deletions aten/src/THC/THCTensorMathReduce.cu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include "THCTensorMathReduce.cuh"

THC_API int
THCudaByteTensor_logicalall(THCState *state, THCudaByteTensor *self) {
THCudaByteTensor_logicalAndAll(THCState *state, THCudaByteTensor *self) {
THCAssertSameGPU(THCudaByteTensor_checkGPU(state, 1, self));
unsigned char result;
if (!THC_reduceAll(state, self,
Expand All @@ -16,7 +16,7 @@ THCudaByteTensor_logicalall(THCState *state, THCudaByteTensor *self) {
}

THC_API int
THCudaByteTensor_logicalany(THCState *state, THCudaByteTensor *self) {
THCudaByteTensor_logicalAnyAll(THCState *state, THCudaByteTensor *self) {
THCAssertSameGPU(THCudaByteTensor_checkGPU(state, 1, self));
unsigned char result;
if (!THC_reduceAll(state, self,
Expand All @@ -29,3 +29,35 @@ THCudaByteTensor_logicalany(THCState *state, THCudaByteTensor *self) {

return (int) result;
}

THC_API void
THCudaByteTensor_logicalAnd(THCState* state, THCudaByteTensor *self, THCudaByteTensor *src, int dimension, int keepdim) {
THCAssertSameGPU(THCudaByteTensor_checkGPU(state, 2, self, src));
if (!THC_reduceDim(state, self, src,
thrust::identity<unsigned char>(),
LogicalAll(),
LogicalAll(),
(unsigned char) 1,
dimension,
keepdim)) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}

THCudaCheck(cudaGetLastError());
}

THC_API void
THCudaByteTensor_logicalAny(THCState* state, THCudaByteTensor *self, THCudaByteTensor *src, int dimension, int keepdim) {
THCAssertSameGPU(THCudaByteTensor_checkGPU(state, 2, self, src));
if (!THC_reduceDim(state, self, src,
thrust::identity<unsigned char>(),
LogicalAny(),
LogicalAny(),
(unsigned char) 0,
dimension,
keepdim)) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}

THCudaCheck(cudaGetLastError());
}
17 changes: 17 additions & 0 deletions test/test_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,23 @@ def test_all_any_empty(self):
self.assertTrue(x.all())
self.assertFalse(x.any())

def test_all_any_with_dim(self):
def test(x):
r1 = x.prod(dim=0, keepdim=False)
r2 = x.all(dim=0, keepdim=False)
self.assertEqual(r1.shape, r2.shape)
self.assertTrue((r1 == r2).all())

r3 = x.sum(dim=1, keepdim=True).clamp(0, 1)
r4 = x.any(dim=1, keepdim=True)
self.assertEqual(r3.shape, r4.shape)
self.assertTrue((r3 == r4).all())

test(torch.ByteTensor([[0, 0, 0],
[0, 0, 1],
[0, 1, 1],
[1, 1, 1]]))

@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_all_any_empty_cuda(self):
x = torch.cuda.ByteTensor()
Expand Down
Loading