Skip to content

Commit 5deb60e

Browse files
committed
Use integer floor division for pooling shape computation
1 parent f13fadd commit 5deb60e

File tree

4 files changed

+39
-4
lines changed

4 files changed

+39
-4
lines changed

aten/src/ATen/native/Pool.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <ATen/ATen.h>
22
#include <ATen/Parallel.h>
33
#include <ATen/NativeFunctions.h>
4+
#include <ATen/div_rtn.h>
45
#include <tuple>
56

67
#pragma once
@@ -23,7 +24,9 @@ safe_downcast(src_t v)
2324
template<typename T>
2425
static inline T pooling_output_shape(
2526
T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
26-
T outputSize = ((inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
27+
T outputSize = div_rtn<T>(
28+
inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 +
29+
(ceil_mode ? stride - 1 : 0), stride) + 1;
2730
if (pad) {
2831
// ensure that the last pooling starts inside the image
2932
// needed to avoid problems in ceil mode

aten/src/THCUNN/generic/pooling_shape.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
#ifndef THCUNN_POOLING_SHAPE_H
22
#define THCUNN_POOLING_SHAPE_H
33

4+
#include <ATen/div_rtn.h>
5+
46
template<typename T>
57
__host__ __forceinline__
68
static T pooling_output_shape(
79
T inputSize, T kernelSize, T pad, T stride, T dilation, bool ceil_mode) {
8-
T outputSize = ((inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
10+
T outputSize = div_rtn<T>(
11+
inputSize + 2 * pad - dilation * (kernelSize - 1) - 1 +
12+
(ceil_mode ? stride - 1 : 0), stride) + 1;
913
if (pad) {
1014
// ensure that the last pooling starts inside the image
1115
// needed to avoid problems in ceil mode

aten/src/THNN/generic/pooling_shape.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
#ifndef THNN_POOLING_SHAPE_H
22
#define THNN_POOLING_SHAPE_H
33

4+
#include <ATen/div_rtn.h>
5+
46
template<typename T>
57
static inline T pooling_output_shape_pad_lr(
68
T inputSize, T kernelSize, T pad_l, T pad_r, T stride, T dilation,
79
bool ceil_mode
810
) {
9-
T outputSize = ((inputSize + pad_l + pad_r - dilation * (kernelSize - 1)
10-
- 1 + (ceil_mode ? stride - 1 : 0)) / stride + 1);
11+
T outputSize = div_rtn<T>(
12+
inputSize + pad_l + pad_r - dilation * (kernelSize - 1) - 1 +
13+
(ceil_mode ? stride - 1 : 0), stride) + 1;
1114
if (pad_l) {
1215
// ensure that the last pooling starts inside the image
1316
// needed to avoid problems in ceil mode

test/test_nn.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3902,6 +3902,31 @@ def test_pool_large_size_cuda(self, dtype=torch.float):
39023902
def test_pool_large_size(self, dtype=torch.float):
39033903
self._test_pool_large_size(self, device="cpu")
39043904

3905+
@staticmethod
3906+
def _test_pool_invalid_size(self, device, dtype=torch.float):
3907+
for op in ('max', 'avg'):
3908+
for num_dim in [1, 2, 3]:
3909+
fn_name = '{}_pool{}d'.format(op, num_dim)
3910+
fn = getattr(F, fn_name)
3911+
# use a configuration that gives zero outputs only
3912+
# when doing a correct floor division by the stride
3913+
x = torch.ones([1, 1] + num_dim * [4],
3914+
device=device, dtype=dtype)
3915+
with self.assertRaisesRegex(RuntimeError, r"too small|smaller than"):
3916+
try:
3917+
res = fn(x, 3, stride=2, padding=0, dilation=2)
3918+
except TypeError:
3919+
# some implementations do not support dilation
3920+
res = fn(x, 6, stride=2, padding=0)
3921+
3922+
@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
3923+
@repeat_test_for_types(ALL_TENSORTYPES)
3924+
def test_pool_invalid_size_cuda(self, dtype=torch.float):
3925+
self._test_pool_invalid_size(self, device="cuda", dtype=dtype)
3926+
3927+
def test_pool_invalid_size(self, dtype=torch.float):
3928+
self._test_pool_invalid_size(self, device="cpu")
3929+
39053930
def _test_scatter(self, tensor):
39063931
x = tensor.detach().requires_grad_()
39073932
result = dp.scatter(x, (0, 1))

0 commit comments

Comments
 (0)