Skip to content

Commit 5d77709

Browse files
ssnlezyang
authored andcommitted
Linearly interpolating upsampling fix (#5927)
* Changes in bilinear upsampling * Add align_corners option to upsampling module & functional when using linearly interpolating modes When align_corners=True, it uses the old original upsampling scheme, which gives visually better results, but doesn't properly align input and output pixels, and thus cause the output vary basing on input. This PR adds this align_corners option, and changes the default behavior to align_corners=False, with proper warning if this option is not specified upon using nn.Upsample or nn.functional.upsample to let be aware of this new change. Adds tests in test_nn.py for spatial invariance when align_corners=False, and usual module tests for align_corners=False. * remove redundant checks and unnecessary variables; fix the cast * fix negative indices
1 parent 2f8d658 commit 5d77709

21 files changed

+423
-186
lines changed

aten/src/ATen/nn.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,17 +206,17 @@
206206
# Note: The upsampling backwards functions also include an IntList input_size
207207
# parameter, which is added by nn_parse.py
208208

209-
- name: upsample_linear1d(Tensor self, IntList[1] output_size)
209+
- name: upsample_linear1d(Tensor self, IntList[1] output_size, bool align_corners)
210210
cname: TemporalUpSamplingLinear
211211
scalar_check:
212212
grad_input: 'false'
213213

214-
- name: upsample_bilinear2d(Tensor self, IntList[2] output_size)
214+
- name: upsample_bilinear2d(Tensor self, IntList[2] output_size, bool align_corners)
215215
cname: SpatialUpSamplingBilinear
216216
scalar_check:
217217
grad_input: 'false'
218218

219-
- name: upsample_trilinear3d(Tensor self, IntList[3] output_size)
219+
- name: upsample_trilinear3d(Tensor self, IntList[3] output_size, bool align_corners)
220220
cname: VolumetricUpSamplingTrilinear
221221
scalar_check:
222222
grad_input: 'false'

aten/src/ATen/nn_parse.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,19 +286,23 @@ def backward_declaration(base, thnn_functions):
286286
if arg['name'] != 'inplace']
287287
arguments += base['buffers']
288288

289+
if 'upsample' in base['name']:
290+
# Add input_size as parameter to upsample backwards functions
291+
# Note that input_size is 4-dim for upsample_xxx2d
292+
size = 2 + int(re.search(r'(\d+)d', base['name']).group(1))
293+
input_size_arg = {'type': 'IntList', 'name': 'input_size', 'size': size}
294+
for output_size_idx, arg in enumerate(arguments):
295+
if arg['name'] == 'output_size':
296+
break
297+
arguments.insert(output_size_idx + 1, input_size_arg)
298+
289299
# outputs from the forward may be inputs to the backwards
290300
for arg in arguments:
291301
if 'output' in arg:
292302
del arg['output']
293303

294304
arguments += unique_args([output_arguments(f) for f in thnn_functions])
295305

296-
if 'upsample' in base['name']:
297-
# Add input_size as parameter to upsample backwards functions
298-
# Note that input_size is 4-dim for upsample_xxx2d
299-
size = 2 + int(re.search(r'(\d+)d', base['name']).group(1))
300-
arguments.append({'type': 'IntList', 'name': 'input_size', 'size': size})
301-
302306
def initialize_output_arg(arg):
303307
# the mask array<bool, N> specifies which return values to compute
304308
arg['mask'] = True

aten/src/ATen/test/basic.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ static void test(Type & type) {
232232
for (int64_t i = 0; i < tensor.numel(); ++i) {
233233
REQUIRE(tensor[i].equal(one * i));
234234
}
235-
for (size_t i = 0; i < tensor.numel(); ++i) {
235+
for (size_t i = 0; i < static_cast<uint64_t>(tensor.numel()); ++i) {
236236
REQUIRE(tensor[i].equal(one * static_cast<int64_t>(i)));
237237
}
238238
for (int i = 0; i < tensor.numel(); ++i) {

aten/src/THCUNN/SpatialUpSamplingBilinear.cu

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Originally developed by George Papandreou
33
#include "THCUNN.h"
44
#include "common.h"
5+
#include "linear_upsampling.h"
56
#include "THCDeviceTensor.cuh"
67
#include "THCDeviceTensorUtils.cuh"
78
#include "THCDeviceUtils.cuh"
@@ -11,7 +12,7 @@
1112

1213
template<typename Dtype, typename Acctype>
1314
__global__ void caffe_gpu_interp2_kernel(const int n,
14-
const Acctype rheight, const Acctype rwidth,
15+
const Acctype rheight, const Acctype rwidth, const bool align_corners,
1516
const THCDeviceTensor<Dtype, 4> data1, THCDeviceTensor<Dtype, 4> data2) {
1617
int index = threadIdx.x + blockIdx.x * blockDim.x;
1718
const int batchsize = data1.getSize(0);
@@ -37,13 +38,13 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
3738
return;
3839
}
3940
//
40-
const Acctype h1r = rheight * h2;
41+
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
4142
const int h1 = h1r;
4243
const int h1p = (h1 < height1 - 1) ? 1 : 0;
4344
const Acctype h1lambda = h1r - h1;
4445
const Acctype h0lambda = Acctype(1) - h1lambda;
4546
//
46-
const Acctype w1r = rwidth * w2;
47+
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
4748
const int w1 = w1r;
4849
const int w1p = (w1 < width1 - 1) ? 1 : 0;
4950
const Acctype w1lambda = w1r - w1;
@@ -64,7 +65,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
6465
// Backward (adjoint) operation 1 <- 2 (accumulates)
6566
template <typename Dtype, typename Acctype>
6667
__global__ void caffe_gpu_interp2_kernel_backward(const int n,
67-
const Acctype rheight, const Acctype rwidth,
68+
const Acctype rheight, const Acctype rwidth, const bool align_corners,
6869
THCDeviceTensor<Dtype, 4> data1, const THCDeviceTensor<Dtype, 4> data2){
6970
int index = threadIdx.x + blockIdx.x * blockDim.x;
7071
const int batchsize = data1.getSize(0);
@@ -89,13 +90,13 @@ __global__ void caffe_gpu_interp2_kernel_backward(const int n,
8990
return;
9091
}
9192
//
92-
const Acctype h1r = rheight * h2;
93+
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
9394
const int h1 = h1r;
9495
const int h1p = (h1 < height1 - 1) ? 1 : 0;
9596
const Acctype h1lambda = h1r - h1;
9697
const Acctype h0lambda = Acctype(1) - h1lambda;
9798
//
98-
const Acctype w1r = rwidth * w2;
99+
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
99100
const int w1 = w1r;
100101
const int w1p = (w1 < width1 - 1) ? 1 : 0;
101102
const Acctype w1lambda = w1r - w1;

aten/src/THCUNN/TemporalUpSamplingLinear.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Originally developed by George Papandreou
33
#include "THCUNN.h"
44
#include "common.h"
5+
#include "linear_upsampling.h"
56
#include "THCDeviceTensor.cuh"
67
#include "THCDeviceTensorUtils.cuh"
78
#include "THCDeviceUtils.cuh"
@@ -11,7 +12,7 @@
1112

1213
template<typename Dtype, typename Acctype>
1314
__global__ void caffe_gpu_interp2_kernel(const int n,
14-
const Acctype rwidth,
15+
const Acctype rwidth, const bool align_corners,
1516
const THCDeviceTensor<Dtype, 3> data1, THCDeviceTensor<Dtype, 3> data2) {
1617
int index = threadIdx.x + blockIdx.x * blockDim.x;
1718
const int batchsize = data1.getSize(0);
@@ -33,7 +34,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
3334
return;
3435
}
3536
//
36-
const Acctype w1r = rwidth * w2;
37+
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
3738
const int w1 = w1r;
3839
const int w1p = (w1 < width1 - 1) ? 1 : 0;
3940
const Acctype w1lambda = w1r - w1;
@@ -52,7 +53,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
5253
// Backward (adjoint) operation 1 <- 2 (accumulates)
5354
template <typename Dtype, typename Acctype>
5455
__global__ void caffe_gpu_interp2_kernel_backward(const int n,
55-
const Acctype rwidth,
56+
const Acctype rwidth, const bool align_corners,
5657
THCDeviceTensor<Dtype, 3> data1, const THCDeviceTensor<Dtype, 3> data2){
5758
int index = threadIdx.x + blockIdx.x * blockDim.x;
5859
const int batchsize = data1.getSize(0);
@@ -73,7 +74,7 @@ __global__ void caffe_gpu_interp2_kernel_backward(const int n,
7374
return;
7475
}
7576
//
76-
const Acctype w1r = rwidth * w2;
77+
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
7778
const int w1 = w1r;
7879
const int w1p = (w1 < width1 - 1) ? 1 : 0;
7980
const Acctype w1lambda = w1r - w1;

aten/src/THCUNN/VolumetricUpSamplingTrilinear.cu

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Originally developed by George Papandreou
33
#include "THCUNN.h"
44
#include "common.h"
5+
#include "linear_upsampling.h"
56
#include "THCDeviceTensor.cuh"
67
#include "THCDeviceTensorUtils.cuh"
78
#include "THCDeviceUtils.cuh"
@@ -12,7 +13,7 @@
1213
template<typename Dtype, typename Acctype>
1314
__launch_bounds__(1024)
1415
__global__ void caffe_gpu_interp2_kernel(const int n,
15-
const Acctype rdepth, const Acctype rheight, const Acctype rwidth,
16+
const Acctype rdepth, const Acctype rheight, const Acctype rwidth, const bool align_corners,
1617
const THCDeviceTensor<Dtype, 5> data1, THCDeviceTensor<Dtype, 5> data2) {
1718
int index = threadIdx.x + blockIdx.x * blockDim.x;
1819
const int batchsize = data1.getSize(0);
@@ -42,31 +43,31 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
4243
return;
4344
}
4445
//
45-
const Acctype t1r = rdepth * t2;
46+
const Acctype t1r = linear_upsampling_compute_source_index<Acctype>(rdepth, t2, align_corners);
4647
const int t1 = t1r;
4748
const int t1p = (t1 < depth1 - 1) ? 1 : 0;
4849
const Acctype t1lambda = t1r - t1;
4950
const Acctype t0lambda = Acctype(1) - t1lambda;
5051
//
51-
const Acctype h1r = rheight * h2;
52+
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
5253
const int h1 = h1r;
5354
const int h1p = (h1 < height1 - 1) ? 1 : 0;
5455
const Acctype h1lambda = h1r - h1;
5556
const Acctype h0lambda = Acctype(1) - h1lambda;
5657
//
57-
const Acctype w1r = rwidth * w2;
58+
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
5859
const int w1 = w1r;
5960
const int w1p = (w1 < width1 - 1) ? 1 : 0;
6061
const Acctype w1lambda = w1r - w1;
6162
const Acctype w0lambda = Acctype(1) - w1lambda;
6263
//
6364
for (int n = 0; n < batchsize ; n++){
6465
for (int c = 0; c < channels; ++c) {
65-
const Acctype val = t0lambda * (h0lambda * (w0lambda * data1[n][c][t1][h1][w1]
66+
const Acctype val = t0lambda * (h0lambda * (w0lambda * data1[n][c][t1][h1][w1]
6667
+ w1lambda * data1[n][c][t1][h1][w1+w1p])
6768
+ h1lambda * (w0lambda * data1[n][c][t1][h1+h1p][w1]
6869
+ w1lambda * data1[n][c][t1][h1+h1p][w1+w1p]))
69-
+ t1lambda * (h0lambda * (w0lambda * data1[n][c][t1+t1p][h1][w1]
70+
+ t1lambda * (h0lambda * (w0lambda * data1[n][c][t1+t1p][h1][w1]
7071
+ w1lambda * data1[n][c][t1+t1p][h1][w1+w1p])
7172
+ h1lambda * (w0lambda * data1[n][c][t1+t1p][h1+h1p][w1]
7273
+ w1lambda * data1[n][c][t1+t1p][h1+h1p][w1+w1p]));
@@ -80,7 +81,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
8081
template <typename Dtype, typename Acctype>
8182
__launch_bounds__(1024)
8283
__global__ void caffe_gpu_interp2_kernel_backward(const int n,
83-
const Acctype rdepth, const Acctype rheight, const Acctype rwidth,
84+
const Acctype rdepth, const Acctype rheight, const Acctype rwidth, const bool align_corners,
8485
THCDeviceTensor<Dtype, 5> data1, const THCDeviceTensor<Dtype, 5> data2){
8586
int index = threadIdx.x + blockIdx.x * blockDim.x;
8687
const int batchsize = data1.getSize(0);
@@ -109,19 +110,19 @@ __global__ void caffe_gpu_interp2_kernel_backward(const int n,
109110
return;
110111
}
111112
//
112-
const Acctype t1r = rdepth * t2;
113+
const Acctype t1r = linear_upsampling_compute_source_index<Acctype>(rdepth, t2, align_corners);
113114
const int t1 = t1r;
114115
const int t1p = (t1 < depth1 - 1) ? 1 : 0;
115116
const Acctype t1lambda = t1r - t1;
116117
const Acctype t0lambda = Acctype(1) - t1lambda;
117118
//
118-
const Acctype h1r = rheight * h2;
119+
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
119120
const int h1 = h1r;
120121
const int h1p = (h1 < height1 - 1) ? 1 : 0;
121122
const Acctype h1lambda = h1r - h1;
122123
const Acctype h0lambda = Acctype(1) - h1lambda;
123124
//
124-
const Acctype w1r = rwidth * w2;
125+
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
125126
const int w1 = w1r;
126127
const int w1p = (w1 < width1 - 1) ? 1 : 0;
127128
const Acctype w1lambda = w1r - w1;

aten/src/THCUNN/generic/SpatialUpSamplingBilinear.cu

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#define THC_GENERIC_FILE "generic/SpatialUpSamplingBilinear.cu"
33
#else
44

5+
#include "../linear_upsampling.h"
6+
57
static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck)
68
(THCState *state,
79
THCTensor *input, THCTensor *gradOutput,
@@ -31,7 +33,8 @@ void THNN_(SpatialUpSamplingBilinear_updateOutput)(
3133
THCTensor *input,
3234
THCTensor *output,
3335
int outputHeight,
34-
int outputWidth)
36+
int outputWidth,
37+
bool align_corners)
3538
{
3639
int nbatch = THCTensor_(size)(state, input, 0);
3740
int channels = THCTensor_(size)(state, input, 1);
@@ -52,14 +55,14 @@ void THNN_(SpatialUpSamplingBilinear_updateOutput)(
5255
THCDeviceTensor<real, 4> idata = toDeviceTensor<real, 4>(state, input);
5356
THCDeviceTensor<real, 4> odata = toDeviceTensor<real, 4>(state, output);
5457
THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0);
55-
const accreal rheight= (outputHeight > 1) ? (accreal)(inputHeight - 1)/(outputHeight - 1) : accreal(0);
56-
const accreal rwidth = (outputWidth > 1) ? (accreal)(inputWidth - 1)/(outputWidth - 1) : accreal(0);
58+
const accreal rheight = linear_upsampling_compute_scale<accreal>(inputHeight, outputHeight, align_corners);
59+
const accreal rwidth = linear_upsampling_compute_scale<accreal>(inputWidth, outputWidth, align_corners);
5760
const int num_kernels = outputHeight * outputWidth;
5861
const int num_threads =
5962
THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock;
6063
cudaStream_t stream = THCState_getCurrentStream(state);
6164
caffe_gpu_interp2_kernel<real, accreal> <<<THCCeilDiv(num_kernels, num_threads), num_threads ,
62-
0 , stream>>>(num_kernels, rheight, rwidth, idata, odata);
65+
0 , stream>>>(num_kernels, rheight, rwidth, align_corners, idata, odata);
6366
THCudaCheck(cudaGetLastError());
6467
THCTensor_(free)(state, input);
6568
}
@@ -74,7 +77,8 @@ void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
7477
int inputHeight,
7578
int inputWidth,
7679
int outputHeight,
77-
int outputWidth)
80+
int outputWidth,
81+
bool align_corners)
7882
{
7983
THNN_(SpatialUpSamplingBilinear_shapeCheck)
8084
(state, NULL, gradOutput,
@@ -88,19 +92,14 @@ void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
8892
THCTensor_(zero)(state, gradInput);
8993
THCDeviceTensor<real, 4> data1 = toDeviceTensor<real, 4>(state, gradInput);
9094
THCDeviceTensor<real, 4> data2 = toDeviceTensor<real, 4>(state, gradOutput);
91-
int height1 = data1.getSize(2);
92-
int width1 = data1.getSize(3);
93-
int height2 = data2.getSize(2);
94-
int width2 = data2.getSize(3);
95-
assert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
96-
const accreal rheight= (height2 > 1) ? (accreal)(height1 - 1)/(height2 - 1) : accreal(0);
97-
const accreal rwidth = (width2 > 1) ? (accreal)(width1 - 1) / (width2 - 1) : accreal(0);
98-
const int num_kernels = height2 * width2;
95+
const accreal rheight = linear_upsampling_compute_scale<accreal>(inputHeight, outputHeight, align_corners);
96+
const accreal rwidth = linear_upsampling_compute_scale<accreal>(inputWidth, outputWidth, align_corners);
97+
const int num_kernels = outputHeight * outputWidth;
9998
const int num_threads =
10099
THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock;
101100
cudaStream_t stream = THCState_getCurrentStream(state);
102101
caffe_gpu_interp2_kernel_backward<real ,accreal> <<<THCCeilDiv(num_kernels, num_threads),
103-
num_threads, 0, stream>>>(num_kernels, rheight, rwidth, data1, data2);
102+
num_threads, 0, stream>>>(num_kernels, rheight, rwidth, align_corners, data1, data2);
104103
THCudaCheck(cudaGetLastError());
105104
THCTensor_(free)(state, gradInput);
106105
THCTensor_(free)(state, gradOutput);

aten/src/THCUNN/generic/THCUNN.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,8 @@ TH_API void THNN_(SpatialUpSamplingBilinear_updateOutput)(
10421042
THCTensor *input,
10431043
THCTensor *output,
10441044
int outputHeight,
1045-
int outputWidth);
1045+
int outputWidth,
1046+
bool align_corners);
10461047

10471048
TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
10481049
THCState *state,
@@ -1053,7 +1054,8 @@ TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
10531054
int inputHeight,
10541055
int inputWidth,
10551056
int outputHeight,
1056-
int outputWidth);
1057+
int outputWidth,
1058+
bool align_corners);
10571059

10581060
TH_API void THNN_(SpatialUpSamplingNearest_updateGradInput)(
10591061
THCState *state,
@@ -1336,7 +1338,8 @@ TH_API void THNN_(TemporalUpSamplingLinear_updateOutput)(
13361338
THCState *state,
13371339
THCTensor *input,
13381340
THCTensor *output,
1339-
int outputWidth);
1341+
int outputWidth,
1342+
bool align_corners);
13401343

13411344
TH_API void THNN_(TemporalUpSamplingLinear_updateGradInput)(
13421345
THCState *state,
@@ -1345,7 +1348,8 @@ TH_API void THNN_(TemporalUpSamplingLinear_updateGradInput)(
13451348
int nbatch,
13461349
int nchannels,
13471350
int inputWidth,
1348-
int outputWidth);
1351+
int outputWidth,
1352+
bool align_corners);
13491353

13501354
TH_API void THNN_(TemporalUpSamplingNearest_updateGradInput)(
13511355
THCState *state,
@@ -1701,7 +1705,8 @@ TH_API void THNN_(VolumetricUpSamplingTrilinear_updateOutput)(
17011705
THCTensor *output,
17021706
int outputDepth,
17031707
int outputHeight,
1704-
int outputWidth);
1708+
int outputWidth,
1709+
bool align_corners);
17051710

17061711
TH_API void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)(
17071712
THCState *state,
@@ -1714,6 +1719,7 @@ TH_API void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)(
17141719
int inputWidth,
17151720
int outputDepth,
17161721
int outputHeight,
1717-
int outputWidth);
1722+
int outputWidth,
1723+
bool align_corners);
17181724

17191725
#endif

0 commit comments

Comments
 (0)