Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion aten/src/TH/generic/THTensorMath.c
Original file line number Diff line number Diff line change
Expand Up @@ -4022,6 +4022,9 @@ void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int k
} else if (value == 3) {
DIM_REDUCE(sum += TH_MATH_NAME(fabs)(t_data[i*t_stride] * t_data[i*t_stride] * t_data[i*t_stride]),
*r__data = TH_MATH_NAME(pow)(sum, 1.0/3));
} else if (value == INFINITY) {
DIM_REDUCE(sum = THMax(sum, TH_MATH_NAME(fabs)(t_data[i*t_stride])),
*r__data = sum);
} else {
DIM_REDUCE(sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(t_data[i*t_stride]), value),
*r__data = TH_MATH_NAME(pow)(sum, 1.0/value));
Expand All @@ -4048,6 +4051,9 @@ accreal THTensor_(normall)(THTensor *tensor, real value)
} else if(value == 3) {
TH_TENSOR_APPLY(real, tensor, accreal z = *tensor_data; sum += std::abs(z*z*z););
return TH_MATH_NAME(pow)(sum, 1.0/3);
} else if(value == INFINITY) {
TH_TENSOR_APPLY(real, tensor, sum = THMax(sum, TH_MATH_NAME(fabs)(*tensor_data)););
return sum;
} else {
TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value););
return TH_MATH_NAME(pow)(sum, 1.0/value);
Expand Down Expand Up @@ -4081,11 +4087,15 @@ void THTensor_(renorm)(THTensor *res, THTensor *src, real value, int dimension,
TH_TENSOR_APPLY(real, rowS, norm += fabs(*rowS_data););
} else if (value == 2) {
TH_TENSOR_APPLY(real, rowS, accreal z = *rowS_data; norm += z*z;);
} else if (value == INFINITY) {
TH_TENSOR_APPLY(real, rowS, norm = THMax(norm, TH_MATH_NAME(fabs)(*rowS_data)););
} else {
TH_TENSOR_APPLY(real, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value););
}

norm = pow(norm, 1/value);
if (value != INFINITY) {
norm = pow(norm, 1/value);
}

if (norm > maxnorm)
{
Expand Down
81 changes: 63 additions & 18 deletions aten/src/THC/THCTensorMathReduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,23 @@ struct ReduceMax {
}
};

template <typename InT, typename AccT>
struct ReduceMaxTo {
inline __device__ AccT operator()(InT a, InT b) const {
return ScalarConvert<InT, AccT>::to(THCNumerics<InT>::gt(a, b) ? a : b);
}
};

#ifdef CUDA_HALF_TENSOR
template <>
struct ReduceMaxTo<half, float> {
inline __device__ float operator()(float a, half b) const {
float b_f = __half2float(b);
return (THCNumerics<float>::gt(a, b_f) ? a : b_f);
}
};
#endif // CUDA_HALF_TENSOR

struct LogicalAll {
inline __device__ unsigned char operator()(unsigned char x,
unsigned char y) const {
Expand All @@ -130,6 +147,11 @@ struct LogicalAny {
}
};

template<typename Real>
inline __device__ Real THCMax(const Real a, const Real b) {
return THCNumerics<Real>::gt(a, b) ? a : b;
}

template<typename Real>
__global__ void THCTensor_kernel_renorm(Real *data, const Real value, const ptrdiff_t size, const Real maxnorm)
{
Expand All @@ -140,27 +162,50 @@ __global__ void THCTensor_kernel_renorm(Real *data, const Real value, const ptrd
Real *row = data + size*bx;

buffer[tx] = ScalarConvert<int, Real>::to(0);
Real norm;

// get norm of axis
for (ptrdiff_t i=tx; i<size; i+=step)
{
buffer[tx] = THCNumerics<Real>::add(
buffer[tx],
THCNumerics<Real>::pow(
THCNumerics<Real>::abs(row[i]),
value)
);
}
// add (reduce)
for (unsigned int stride = blockDim.x >> 1; stride > 0; stride >>= 1)
{
if (THCNumerics<Real>::eq(value, ScalarConvert<float, Real>::to(INFINITY))) {
// get norm of axis
for (ptrdiff_t i=tx; i<size; i+=step)
{
buffer[tx] = THCMax<Real>(
buffer[tx],
THCNumerics<Real>::abs(row[i])
);
}
// add (reduce)
for (unsigned int stride = blockDim.x >> 1; stride > 0; stride >>= 1)
{
__syncthreads();
if (tx < stride)
buffer[tx] = THCMax<Real>(buffer[tx], buffer[tx+stride]);
}
// clip norms
__syncthreads();
if (tx < stride)
buffer[tx] = THCNumerics<Real>::add(buffer[tx], buffer[tx+stride]);
norm = buffer[0];
} else {
// get norm of axis
for (ptrdiff_t i=tx; i<size; i+=step)
{
buffer[tx] = THCNumerics<Real>::add(
buffer[tx],
THCNumerics<Real>::pow(
THCNumerics<Real>::abs(row[i]),
value)
);
}
// add (reduce)
for (unsigned int stride = blockDim.x >> 1; stride > 0; stride >>= 1)
{
__syncthreads();
if (tx < stride)
buffer[tx] = THCNumerics<Real>::add(buffer[tx], buffer[tx+stride]);
}
// clip norms
__syncthreads();
norm = THCNumerics<Real>::pow(buffer[0], THCNumerics<Real>::cinv(value));
}
// clip norms
__syncthreads();
Real norm = THCNumerics<Real>::pow(buffer[0], THCNumerics<Real>::cinv(value));

if (THCNumerics<Real>::gt(norm, maxnorm))
{
norm = THCNumerics<Real>::div(
Expand Down
11 changes: 11 additions & 0 deletions aten/src/THC/generic/THCTensorMathReduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,10 @@ THCTensor_(norm)(THCState *state, THCTensor* self, THCTensor* src, real value, i
ScalarConvert<float, accreal>::to(0.0), dimension, keepdim);
THCTensor_(pow)(state, self, self, ScalarConvert<float, real>::to(0.5));

} else if (THCNumerics<real>::eq(value, ScalarConvert<float, real>::to(INFINITY))) {
THC_reduceDim(state, self, src,
TensorNormOp<real, 1>(value), ReduceMaxTo<real, accreal>(), ReduceMax<accreal>(),
ScalarConvert<float, accreal>::to(0.0), dimension, keepdim);
} else {
THC_reduceDim(state, self, src,
TensorNormOp<real, -1>(value), ReduceAdd<real, accreal>(), ReduceAdd<accreal, accreal>(),
Expand Down Expand Up @@ -220,6 +224,13 @@ THCTensor_(normall)(THCState *state, THCTensor *self, real value)
ScalarConvert<float, accreal>::to(0.0f),
&result, 0);
result = THCNumerics<accreal>::sqrt(result);
} else if (THCNumerics<real>::eq(value, ScalarConvert<float, real>::to(INFINITY))) {
THC_reduceAll(state, self,
TensorNormOp<real, 1>(value),
ReduceMaxTo<real, accreal>(),
ReduceMax<accreal>(),
ScalarConvert<float, accreal>::to(0.0f),
&result, 0);
} else {
THC_reduceAll(state, self,
TensorNormOp<real, -1>(value),
Expand Down
2 changes: 2 additions & 0 deletions test/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2531,6 +2531,7 @@ class dont_convert(tuple):
('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]),
('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]),
('renorm', (S, S, S), (1, 2, 3), 'norm_1'),
('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'),
('repeat', (S,), (2,), 'single_number'),
('repeat', (), (2, 3), 'scalar'),
('repeat', (2, 2), (3, 2)),
Expand Down Expand Up @@ -2619,6 +2620,7 @@ class dont_convert(tuple):
('norm', (S, S), (0.5,), '0_5'),
('norm', (S, S), (1,), '1'),
('norm', (S, S), (3,), '3'),
('norm', (S, S), (float('inf'),), 'inf'),
('norm', (S, S), (-1,), 'neg_1'),
('norm', (S, S), (-0.5,), 'neg_0_5'),
('norm', (S, S), (-1.5,), 'neg_1_5'),
Expand Down
44 changes: 44 additions & 0 deletions test/test_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,33 @@ def test_max(self):
def test_min(self):
self._testSelection(torch.min, min)

@staticmethod
def _test_norm(self, device):
# full reduction
x = torch.randn(5, device=device)
xn = x.cpu().numpy()
for p in [0, 1, 2, 3, 4, float('inf')]:
res = x.norm(p).item()
expected = np.linalg.norm(xn, p)
self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p))
# one dimension
x = torch.randn(5, 5, device=device)
xn = x.cpu().numpy()
for p in [0, 1, 2, 3, 4, float('inf')]:
res = x.norm(p, 1).cpu().numpy()
expected = np.linalg.norm(xn, p, 1)
self.assertEqual(res.shape, expected.shape)
self.assertTrue(np.allclose(res, expected), "dim reduction failed for {}-norm".format(p))

@unittest.skipIf(not TEST_NUMPY, "Numpy not found")
def test_norm(self):
self._test_norm(self, device='cpu')

@unittest.skipIf(not TEST_NUMPY, "Numpy not found")
@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_norm_cuda(self):
self._test_norm(self, device='cuda')

def test_dim_reduction_uint8_overflow(self):
example = [[-1, 2, 1], [5, 3, 6]]
x = torch.tensor(example, dtype=torch.uint8)
Expand Down Expand Up @@ -2023,6 +2050,23 @@ def renorm(matrix, value, dim, max_norm):
self.assertEqual(m3, m2)
self.assertEqual(m3.norm(2, 0), m2.norm(2, 0))

@staticmethod
def _test_renorm_ps(self, device):
# full reduction
x = torch.randn(5, 5)
xn = x.numpy()
for p in [1, 2, 3, 4, float('inf')]:
res = x.renorm(p, 1, 1)
expected = x / x.norm(p, 0, keepdim=True).clamp(min=1)
self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p))

def test_renorm_ps(self):
self._test_renorm_ps(self, device='cpu')

@unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
def test_renorm_ps_cuda(self):
self._test_renorm_ps(self, device='cuda')

@staticmethod
def _test_multinomial(self, type):
def make_prob_dist(shape, is_contiguous):
Expand Down
3 changes: 3 additions & 0 deletions tools/autograd/templates/Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_
} else if (p == 2.0) {
self_scaled = self;
scale_v = grad / norm;
} else if (p == INFINITY) {
self_scaled = self.sign() * (self.abs() == norm).toType(self.type());
scale_v = grad.clone();
} else {
self_scaled = self * self.abs().pow(p - 2);
scale_v = grad / norm.pow(p - 1);
Expand Down