Use integer floor division for pooling shape computation

f0k · f0k · commit 5deb60eac15f · 2019-06-28T15:41:49.000+02:00
diff --git a/aten/src/ATen/native/Pool.h b/aten/src/ATen/native/Pool.h
@@ -1,6 +1,7 @@
 #include <ATen/ATen.h>
 #include <ATen/Parallel.h>
 #include <ATen/NativeFunctions.h>
+#include <ATen/div_rtn.h>
 #include <tuple>
 
 #pragma once
@@ -23,7 +24,9 @@ safe_downcast(src_t v)
 template<typename T>
 static inline T pooling_output_shape(
         T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
-    T outputSize = ((inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
+    T outputSize = div_rtn<T>(
+        inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 +
+        (ceil_mode ? stride - 1 : 0), stride) + 1;
     if (pad) {
         // ensure that the last pooling starts inside the image
         // needed to avoid problems in ceil mode
diff --git a/aten/src/THCUNN/generic/pooling_shape.h b/aten/src/THCUNN/generic/pooling_shape.h
@@ -1,11 +1,15 @@
 #ifndef THCUNN_POOLING_SHAPE_H
 #define THCUNN_POOLING_SHAPE_H
 
+#include <ATen/div_rtn.h>
+
 template<typename T>
 __host__ __forceinline__
 static T pooling_output_shape(
         T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
-    T outputSize = ((inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
+    T outputSize = div_rtn<T>(
+        inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 +
+        (ceil_mode ? stride - 1 : 0), stride) + 1;
     if (pad) {
         // ensure that the last pooling starts inside the image
         // needed to avoid problems in ceil mode
diff --git a/aten/src/THNN/generic/pooling_shape.h b/aten/src/THNN/generic/pooling_shape.h
@@ -1,13 +1,16 @@
 #ifndef THNN_POOLING_SHAPE_H
 #define THNN_POOLING_SHAPE_H
 
+#include <ATen/div_rtn.h>
+
 template<typename T>
 static inline T pooling_output_shape_pad_lr(
     T inputSize, T kernelSize, T pad_l, T pad_r, T stride, T dilation,
     bool ceil_mode
   ) {
-    T outputSize = ((inputSize + pad_l + pad_r - dilation * (kernelSize - 1)
-        - 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
+    T outputSize = div_rtn<T>(
+        inputSize + pad_l + pad_r - dilation * (kernelSize - 1) - 1 +
+        (ceil_mode ? stride - 1 : 0), stride) + 1;
     if (pad_l) {
         // ensure that the last pooling starts inside the image
         // needed to avoid problems in ceil mode
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -3902,6 +3902,31 @@ def test_pool_large_size_cuda(self, dtype=torch.float):
     def test_pool_large_size(self, dtype=torch.float):
         self._test_pool_large_size(self, device="cpu")
 
+    @staticmethod
+    def _test_pool_invalid_size(self, device, dtype=torch.float):
+        for op in ('max', 'avg'):
+            for num_dim in [1, 2, 3]:
+                fn_name = '{}_pool{}d'.format(op, num_dim)
+                fn = getattr(F, fn_name)
+                # use a configuration that gives zero outputs only
+                # when doing a correct floor division by the stride
+                x = torch.ones([1, 1] + num_dim * [4],
+                               device=device, dtype=dtype)
+                with self.assertRaisesRegex(RuntimeError, r"too small|smaller than"):
+                    try:
+                        res = fn(x, 3, stride=2, padding=0, dilation=2)
+                    except TypeError:
+                        # some implementations do not support dilation
+                        res = fn(x, 6, stride=2, padding=0)
+
+    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    @repeat_test_for_types(ALL_TENSORTYPES)
+    def test_pool_invalid_size_cuda(self, dtype=torch.float):
+        self._test_pool_invalid_size(self, device="cuda", dtype=dtype)
+
+    def test_pool_invalid_size(self, dtype=torch.float):
+        self._test_pool_invalid_size(self, device="cpu")
+
     def _test_scatter(self, tensor):
         x = tensor.detach().requires_grad_()
         result = dp.scatter(x, (0, 1))