Use integer floor division for pooling shape computation (#22304)

f0k · facebook-github-bot · commit 5adba33c01f4 · 2019-07-17T13:23:29.000-07:00
Summary: Fixes #21935 by using the integer floor division that was introduced for convolution shapes in #9640. Without this fix, the pooling operators can produce a 1-element output in cases they shouldn't. Disclaimer: I couldn't properly test it locally (it's not picking up the modified version for some reason). I'm marking this WIP until I checked what the CI tools say... Pull Request resolved: #22304 Differential Revision: D16181955 Pulled By: ezyang fbshipit-source-id: a2405372753572548b40616d1206848b527c8121
diff --git a/aten/src/ATen/native/Pool.h b/aten/src/ATen/native/Pool.h
@@ -1,6 +1,7 @@
 #include <ATen/ATen.h>
 #include <ATen/Parallel.h>
 #include <ATen/NativeFunctions.h>
+#include <ATen/div_rtn.h>
 #include <tuple>
 
 #pragma once
@@ -21,18 +22,29 @@ safe_downcast(src_t v)
 }
 
 template<typename T>
-static inline T pooling_output_shape(
-        T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
-    T outputSize = ((inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
-    if (pad) {
+static inline T pooling_output_shape_pad_lr(
+        T inputSize, T kernelSize, T pad_l, T pad_r, T stride, T dilation,
+        bool ceil_mode) {
+    T outputSize = div_rtn<T>(
+        inputSize + pad_l + pad_r - dilation * (kernelSize - 1) - 1 +
+        (ceil_mode ? stride - 1 : 0), stride) + 1;
+    if (pad_l) {
         // ensure that the last pooling starts inside the image
         // needed to avoid problems in ceil mode
-        if ((outputSize - 1) * stride >= inputSize + pad)
+        if ((outputSize - 1) * stride >= inputSize + pad_l)
           --outputSize;
     }
     return outputSize;
 }
 
+template<typename T>
+static inline T pooling_output_shape(
+      T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
+    return pooling_output_shape_pad_lr(
+        inputSize, kernelSize, pad, pad, stride, dilation, ceil_mode);
+}
+
+
 // AveragePool2d/DilatedMaxPool2d (forward)
 static inline void
 pool2d_shape_check(
diff --git a/aten/src/ATen/native/mkldnn/Utils.cpp b/aten/src/ATen/native/mkldnn/Utils.cpp
@@ -1,5 +1,5 @@
 #include <ATen/native/mkldnn/Utils.h>
-#include <THNN/generic/pooling_shape.h>
+#include <ATen/native/Pool.h>
 
 namespace at { namespace native {
 
diff --git a/aten/src/ATen/native/quantized/cpu/qpool.cpp b/aten/src/ATen/native/quantized/cpu/qpool.cpp
@@ -4,7 +4,7 @@
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/cpu/Loops.h>
 #include <ATen/quantized/Quantizer.h>
-#include <THNN/generic/pooling_shape.h>
+#include <ATen/native/Pool.h>
 
 #include <algorithm>
 #include <vector>
diff --git a/aten/src/THCUNN/generic/pooling_shape.h b/aten/src/THCUNN/generic/pooling_shape.h
diff --git a/aten/src/THNN/generic/pooling_shape.h b/aten/src/THNN/generic/pooling_shape.h
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -4028,6 +4028,31 @@ def test_pool_large_size_cuda(self, dtype=torch.float):
     def test_pool_large_size(self, dtype=torch.float):
         self._test_pool_large_size(self, device="cpu")
 
+    @staticmethod
+    def _test_pool_invalid_size(self, device, dtype=torch.float):
+        for op in ('max', 'avg'):
+            for num_dim in [1, 2, 3]:
+                fn_name = '{}_pool{}d'.format(op, num_dim)
+                fn = getattr(F, fn_name)
+                # use a configuration that gives zero outputs only
+                # when doing a correct floor division by the stride
+                x = torch.ones([1, 1] + num_dim * [4],
+                               device=device, dtype=dtype)
+                with self.assertRaisesRegex(RuntimeError, r"too small|smaller than"):
+                    try:
+                        res = fn(x, 3, stride=2, padding=0, dilation=2)
+                    except TypeError:
+                        # some implementations do not support dilation
+                        res = fn(x, 6, stride=2, padding=0)
+
+    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @repeat_test_for_types(ALL_TENSORTYPES)
+    def test_pool_invalid_size_cuda(self, dtype=torch.float):
+        self._test_pool_invalid_size(self, device="cuda", dtype=dtype)
+
+    def test_pool_invalid_size(self, dtype=torch.float):
+        self._test_pool_invalid_size(self, device="cpu")
+
     def _test_scatter(self, tensor):
         x = tensor.detach().requires_grad_()
         result = dp.scatter(x, (0, 1))
diff --git a/test/test_quantized.py b/test/test_quantized.py
@@ -72,7 +72,7 @@ def _pool_output_shape(self, input_size, kernel_size, padding, stride,
                            dilation, ceiling_mode=False):
         output_size = (
             (input_size + 2 * padding - dilation * (kernel_size - 1) - 1
-             + (stride - 1 if ceiling_mode else 0)) / stride + 1)
+             + (stride - 1 if ceiling_mode else 0)) // stride + 1)
         if (padding > 0 and
                 ((output_size - 1) * stride >= input_size + padding)):
             output_size += 1