Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions aten/src/THC/THCTensorMathPairwise.cu
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ struct TensorTriOp {
TensorTriOp(T *start_, int64_t stride0_, int64_t stride1_, int64_t k_)
: start(start_), stride0(stride0_), stride1(stride1_), k(k_) {}

__device__ __forceinline__ int mask(T *in) {
ptrdiff_t n = in - start;
__device__ __forceinline__ int mask(T *out) {
ptrdiff_t n = out - start;
int64_t row, col;
if (stride0 > stride1)
{
Expand All @@ -393,7 +393,7 @@ struct TensorTriOp {
}

__device__ __forceinline__ void operator()(T* out, T* in) {
*out = mask(in) ? *in : ScalarConvert<int, T>::to(0);
*out = mask(out) ? *in : ScalarConvert<int, T>::to(0);
}

__device__ __forceinline__ void operator()(T* v) {
Expand Down
39 changes: 15 additions & 24 deletions aten/src/THC/generic/THCTensorMathPairwise.cu
Original file line number Diff line number Diff line change
Expand Up @@ -193,31 +193,27 @@ void THCTensor_(tril)(THCState *state, THCTensor *self_, THCTensor *src_, int64_
THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, self_, src_));
THArgCheck(src_->nDimension == 2, 1, "expected a matrix");

THCTensor *src = src_;
if (self_ == src_)
src = THCTensor_(newContiguous)(state, src_);
if (self_ != src_)
THCTensor_(resizeAs)(state, self_, src_);

int64_t stride0 = src->stride[0];
int64_t stride1 = src->stride[1];
real *start = THCTensor_(data)(state, src);
int64_t stride0 = self_->stride[0];
int64_t stride1 = self_->stride[1];
real *start = THCTensor_(data)(state, self_);

TensorTriOp<real, 0> op(start, stride0, stride1, k);

if (self_ == src_) {
if (!THC_pointwiseApply1(state, src, op)) {
if (!THC_pointwiseApply1(state, src_, op)) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
} else {
THCTensor_(resizeAs)(state, self_, src);
THCTensor_(resizeAs)(state, self_, src_);

if (!THC_pointwiseApply2(state, self_, src, op)) {
if (!THC_pointwiseApply2(state, self_, src_, op)) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
}

if (self_ == src_)
THCTensor_(freeCopyTo)(state, src, src_);

THCudaCheck(cudaGetLastError());
}

Expand All @@ -226,31 +222,26 @@ void THCTensor_(triu)(THCState *state, THCTensor *self_, THCTensor *src_, int64_
THCAssertSameGPU(THCTensor_(checkGPU)(state, 2, self_, src_));
THArgCheck(src_->nDimension == 2, 1, "expected a matrix");

THCTensor *src = src_;
if (self_ == src_)
src = THCTensor_(newContiguous)(state, src_);
if (self_ != src_)
THCTensor_(resizeAs)(state, self_, src_);

int64_t stride0 = src->stride[0];
int64_t stride1 = src->stride[1];
real *start = THCTensor_(data)(state, src);
int64_t stride0 = self_->stride[0];
int64_t stride1 = self_->stride[1];
real *start = THCTensor_(data)(state, self_);

TensorTriOp<real, 1> op(start, stride0, stride1, k);

if (self_ == src_) {
if (!THC_pointwiseApply1(state, src, op)) {
if (!THC_pointwiseApply1(state, src_, op)) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
} else {
THCTensor_(resizeAs)(state, self_, src);

if (!THC_pointwiseApply2(state, self_, src, op)) {
if (!THC_pointwiseApply2(state, self_, src_, op)) {
THArgCheck(false, 2, CUTORCH_DIM_WARNING);
}
}

if (self_ == src_)
THCTensor_(freeCopyTo)(state, src, src_);

THCudaCheck(cudaGetLastError());
}

Expand Down
28 changes: 25 additions & 3 deletions test/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def medium_2d(t):
return make_tensor(t, M, M)


def medium_2d_expanded(t):
return t(1).expand(M, M)


def medium_2d_scaled(t, scale=10):
return make_tensor(t, M, M).mul(scale)

Expand Down Expand Up @@ -143,6 +147,13 @@ def tmp(t):
return t(*sizes).copy_(torch.randn(*sizes))
return tmp

# Content of each tuple:
# - function name
# - constructor for the tensor, signature: fn(tensor_type) -> tensor
# - constructor for the arguments, signature: fn(tensor_type) -> list
# - postfix name for the test (must be unique for a given function) (default='')
# - tensor types to use (default=types)
# - disable inplace test, if set to True, no inplace test will be done (default=False)
tests = [
('add', small_3d, lambda t: [number(3.14, 3, t)]),
('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'),
Expand Down Expand Up @@ -296,9 +307,11 @@ def tmp(t):
('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort'),
('trace', medium_2d, lambda t: [],),
('tril', medium_2d, lambda t: [],),
('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
('tril', medium_2d, lambda t: [2], 'positive'),
('tril', medium_2d, lambda t: [-2], 'negative'),
('triu', medium_2d, lambda t: [],),
('triu', medium_2d_expanded, lambda t: [], 'zero_stride', types, True),
('triu', medium_2d, lambda t: [2], 'positive'),
('triu', medium_2d, lambda t: [-2], 'negative'),
('unsqueeze', new_t(2, 3, 4), lambda t: [2],),
Expand Down Expand Up @@ -1351,18 +1364,27 @@ def test_nvtx(self):
for t in types:
tensor = t()
gpu_tensor = get_gpu_type(t)()

# Default values
desc = ''
type_subset = types
no_inplace = False
if len(decl) == 3:
name, constr, arg_constr = decl
desc = ''
elif len(decl) == 4:
name, constr, arg_constr, desc = decl
elif len(decl) == 5:
name, constr, arg_constr, desc, type_subset = decl
if t not in type_subset:
continue
elif len(decl) == 6:
name, constr, arg_constr, desc, type_subset, no_inplace = decl

if t not in type_subset:
continue

precision = custom_precision.get(name, TestCuda.precision)
for inplace in (True, False):
if inplace and no_inplace:

This comment was marked as off-topic.

continue
if inplace:
name_inner = name + '_'
else:
Expand Down