Fix forward and backward for norm/renorm with infty norm (fixes #6817) (#6969)

t-vi · apaszke · commit f98b7780867c · 2018-04-26T12:54:53.000+02:00
diff --git a/aten/src/TH/generic/THTensorMath.c b/aten/src/TH/generic/THTensorMath.c
@@ -4252,6 +4252,9 @@ void THTensor_(norm)(THTensor *r_, THTensor *t, real value, int dimension, int k
   } else if (value == 3) {
     DIM_REDUCE(sum += TH_MATH_NAME(fabs)(t_data[i*t_stride] * t_data[i*t_stride] * t_data[i*t_stride]),
                *r__data = TH_MATH_NAME(pow)(sum, 1.0/3));
+  } else if (value == INFINITY) {
+    DIM_REDUCE(sum = THMax(sum, TH_MATH_NAME(fabs)(t_data[i*t_stride])),
+	       *r__data = sum);
   } else {
     DIM_REDUCE(sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(t_data[i*t_stride]), value),
                *r__data = TH_MATH_NAME(pow)(sum, 1.0/value));
@@ -4278,6 +4281,9 @@ accreal THTensor_(normall)(THTensor *tensor, real value)
   } else if(value == 3) {
     TH_TENSOR_APPLY(real, tensor, accreal z = *tensor_data; sum += std::abs(z*z*z););
     return TH_MATH_NAME(pow)(sum, 1.0/3);
+  } else if(value == INFINITY) {
+    TH_TENSOR_APPLY(real, tensor, sum = THMax(sum, TH_MATH_NAME(fabs)(*tensor_data)););
+    return sum;
   } else {
     TH_TENSOR_APPLY(real, tensor, sum += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*tensor_data), value););
     return TH_MATH_NAME(pow)(sum, 1.0/value);
@@ -4311,11 +4317,15 @@ void THTensor_(renorm)(THTensor *res, THTensor *src, real value, int dimension,
       TH_TENSOR_APPLY(real, rowS, norm += fabs(*rowS_data););
     } else if (value == 2) {
       TH_TENSOR_APPLY(real, rowS, accreal z = *rowS_data; norm += z*z;);
+    } else if (value == INFINITY) {
+      TH_TENSOR_APPLY(real, rowS, norm = THMax(norm, TH_MATH_NAME(fabs)(*rowS_data)););
     } else {
       TH_TENSOR_APPLY(real, rowS, norm += TH_MATH_NAME(pow)(TH_MATH_NAME(fabs)(*rowS_data), value););
     }
 
-    norm = pow(norm, 1/value);
+    if (value != INFINITY) {
+      norm = pow(norm, 1/value);
+    }
 
     if (norm > maxnorm)
     {
diff --git a/aten/src/THC/THCTensorMathReduce.cuh b/aten/src/THC/THCTensorMathReduce.cuh
@@ -116,6 +116,23 @@ struct ReduceMax {
   }
 };
 
+template <typename InT, typename AccT>
+struct ReduceMaxTo {
+  inline __device__ AccT operator()(InT a, InT b) const {
+    return ScalarConvert<InT, AccT>::to(THCNumerics<InT>::gt(a, b) ? a : b);
+  }
+};
+
+#ifdef CUDA_HALF_TENSOR
+template <>
+struct ReduceMaxTo<half, float> {
+  inline __device__ float operator()(float a, half b) const {
+    float b_f = __half2float(b);
+    return (THCNumerics<float>::gt(a, b_f) ? a : b_f);
+  }
+};
+#endif // CUDA_HALF_TENSOR
+
 struct LogicalAll {
   inline __device__ unsigned char operator()(unsigned char x,
                                              unsigned char y) const {
@@ -130,6 +147,11 @@ struct LogicalAny {
   }
 };
 
+template<typename Real>
+ inline __device__ Real THCMax(const Real a, const Real b) {
+    return THCNumerics<Real>::gt(a, b) ? a : b;
+}
+
 template<typename Real>
 __global__ void THCTensor_kernel_renorm(Real *data, const Real value, const ptrdiff_t size, const Real maxnorm)
 {
@@ -140,27 +162,50 @@ __global__ void THCTensor_kernel_renorm(Real *data, const Real value, const ptrd
   Real *row = data + size*bx;
 
   buffer[tx] = ScalarConvert<int, Real>::to(0);
+  Real norm;
 
-  // get norm of axis
-  for (ptrdiff_t i=tx; i<size; i+=step)
-  {
-    buffer[tx] = THCNumerics<Real>::add(
-      buffer[tx],
-      THCNumerics<Real>::pow(
-        THCNumerics<Real>::abs(row[i]),
-        value)
-    );
-  }
-  // add (reduce)
-  for (unsigned int stride = blockDim.x >> 1; stride > 0; stride >>= 1)
-  {
+  if (THCNumerics<Real>::eq(value, ScalarConvert<float, Real>::to(INFINITY))) {
+    // get norm of axis
+    for (ptrdiff_t i=tx; i<size; i+=step)
+    {
+      buffer[tx] = THCMax<Real>(
+        buffer[tx],
+	THCNumerics<Real>::abs(row[i])
+      );
+    }
+    // add (reduce)
+    for (unsigned int stride = blockDim.x >> 1; stride > 0; stride >>= 1)
+    {
+      __syncthreads();
+      if (tx < stride)
+        buffer[tx] = THCMax<Real>(buffer[tx], buffer[tx+stride]);
+    }
+    // clip norms
     __syncthreads();
-    if (tx < stride)
-      buffer[tx] = THCNumerics<Real>::add(buffer[tx], buffer[tx+stride]);
+    norm = buffer[0];
+  } else {
+    // get norm of axis
+    for (ptrdiff_t i=tx; i<size; i+=step)
+    {
+      buffer[tx] = THCNumerics<Real>::add(
+        buffer[tx],
+        THCNumerics<Real>::pow(
+          THCNumerics<Real>::abs(row[i]),
+          value)
+      );
+    }
+    // add (reduce)
+    for (unsigned int stride = blockDim.x >> 1; stride > 0; stride >>= 1)
+    {
+      __syncthreads();
+      if (tx < stride)
+        buffer[tx] = THCNumerics<Real>::add(buffer[tx], buffer[tx+stride]);
+    }
+    // clip norms
+    __syncthreads();
+    norm = THCNumerics<Real>::pow(buffer[0], THCNumerics<Real>::cinv(value));
   }
-  // clip norms
-  __syncthreads();
-  Real norm = THCNumerics<Real>::pow(buffer[0], THCNumerics<Real>::cinv(value));
+
   if (THCNumerics<Real>::gt(norm, maxnorm))
   {
     norm = THCNumerics<Real>::div(
diff --git a/aten/src/THC/generic/THCTensorMathReduce.cu b/aten/src/THC/generic/THCTensorMathReduce.cu
@@ -182,6 +182,10 @@ THCTensor_(norm)(THCState *state, THCTensor* self, THCTensor* src, real value, i
                   ScalarConvert<float, accreal>::to(0.0), dimension, keepdim);
     THCTensor_(pow)(state, self, self, ScalarConvert<float, real>::to(0.5));
 
+  } else if (THCNumerics<real>::eq(value, ScalarConvert<float, real>::to(INFINITY))) {
+    THC_reduceDim(state, self, src,
+                  TensorNormOp<real, 1>(value), ReduceMaxTo<real, accreal>(), ReduceMax<accreal>(),
+                  ScalarConvert<float, accreal>::to(0.0), dimension, keepdim);
   } else {
     THC_reduceDim(state, self, src,
                   TensorNormOp<real, -1>(value), ReduceAdd<real, accreal>(), ReduceAdd<accreal, accreal>(),
@@ -220,6 +224,13 @@ THCTensor_(normall)(THCState *state, THCTensor *self, real value)
                   ScalarConvert<float, accreal>::to(0.0f),
                   &result, 0);
     result = THCNumerics<accreal>::sqrt(result);
+  } else if (THCNumerics<real>::eq(value, ScalarConvert<float, real>::to(INFINITY))) {
+    THC_reduceAll(state, self,
+                  TensorNormOp<real, 1>(value),
+                  ReduceMaxTo<real, accreal>(),
+                  ReduceMax<accreal>(),
+                  ScalarConvert<float, accreal>::to(0.0f),
+                  &result, 0);
   } else {
     THC_reduceAll(state, self,
                   TensorNormOp<real, -1>(value),
diff --git a/test/test_autograd.py b/test/test_autograd.py
@@ -2531,6 +2531,7 @@ class dont_convert(tuple):
     ('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]),
     ('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]),
     ('renorm', (S, S, S), (1, 2, 3), 'norm_1'),
+    ('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'),
     ('repeat', (S,), (2,), 'single_number'),
     ('repeat', (), (2, 3), 'scalar'),
     ('repeat', (2, 2), (3, 2)),
@@ -2619,6 +2620,7 @@ class dont_convert(tuple):
     ('norm', (S, S), (0.5,), '0_5'),
     ('norm', (S, S), (1,), '1'),
     ('norm', (S, S), (3,), '3'),
+    ('norm', (S, S), (float('inf'),), 'inf'),
     ('norm', (S, S), (-1,), 'neg_1'),
     ('norm', (S, S), (-0.5,), 'neg_0_5'),
     ('norm', (S, S), (-1.5,), 'neg_1_5'),
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -555,6 +555,33 @@ def test_max(self):
     def test_min(self):
         self._testSelection(torch.min, min)
 
+    @staticmethod
+    def _test_norm(self, device):
+        # full reduction
+        x = torch.randn(5, device=device)
+        xn = x.cpu().numpy()
+        for p in [0, 1, 2, 3, 4, float('inf')]:
+            res = x.norm(p).item()
+            expected = np.linalg.norm(xn, p)
+            self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p))
+        # one dimension
+        x = torch.randn(5, 5, device=device)
+        xn = x.cpu().numpy()
+        for p in [0, 1, 2, 3, 4, float('inf')]:
+            res = x.norm(p, 1).cpu().numpy()
+            expected = np.linalg.norm(xn, p, 1)
+            self.assertEqual(res.shape, expected.shape)
+            self.assertTrue(np.allclose(res, expected), "dim reduction failed for {}-norm".format(p))
+
+    @unittest.skipIf(not TEST_NUMPY, "Numpy not found")
+    def test_norm(self):
+        self._test_norm(self, device='cpu')
+
+    @unittest.skipIf(not TEST_NUMPY, "Numpy not found")
+    @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
+    def test_norm_cuda(self):
+        self._test_norm(self, device='cuda')
+
     def test_dim_reduction_uint8_overflow(self):
         example = [[-1, 2, 1], [5, 3, 6]]
         x = torch.tensor(example, dtype=torch.uint8)
@@ -2056,6 +2083,23 @@ def renorm(matrix, value, dim, max_norm):
         self.assertEqual(m3, m2)
         self.assertEqual(m3.norm(2, 0), m2.norm(2, 0))
 
+    @staticmethod
+    def _test_renorm_ps(self, device):
+        # full reduction
+        x = torch.randn(5, 5)
+        xn = x.numpy()
+        for p in [1, 2, 3, 4, float('inf')]:
+            res = x.renorm(p, 1, 1)
+            expected = x / x.norm(p, 0, keepdim=True).clamp(min=1)
+            self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p))
+
+    def test_renorm_ps(self):
+        self._test_renorm_ps(self, device='cpu')
+
+    @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA')
+    def test_renorm_ps_cuda(self):
+        self._test_renorm_ps(self, device='cuda')
+
     @staticmethod
     def _test_multinomial(self, type):
         def make_prob_dist(shape, is_contiguous):
diff --git a/tools/autograd/templates/Functions.cpp b/tools/autograd/templates/Functions.cpp
@@ -86,6 +86,9 @@ Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_
   } else if (p == 2.0) {
     self_scaled = self;
     scale_v = grad / norm;
+  } else if (p == INFINITY) {
+    self_scaled = self.sign() * (self.abs() == norm).toType(self.type());
+    scale_v = grad.clone();
   } else {
     self_scaled = self * self.abs().pow(p - 2);
     scale_v = grad / norm.pow(p - 1);