Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions aten/src/ATen/nn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -206,17 +206,17 @@
# Note: The upsampling backwards functions also include an IntList input_size
# parameter, which is added by nn_parse.py

- name: upsample_linear1d(Tensor self, IntList[1] output_size)
- name: upsample_linear1d(Tensor self, IntList[1] output_size, bool align_corners)
cname: TemporalUpSamplingLinear
scalar_check:
grad_input: 'false'

- name: upsample_bilinear2d(Tensor self, IntList[2] output_size)
- name: upsample_bilinear2d(Tensor self, IntList[2] output_size, bool align_corners)
cname: SpatialUpSamplingBilinear
scalar_check:
grad_input: 'false'

- name: upsample_trilinear3d(Tensor self, IntList[3] output_size)
- name: upsample_trilinear3d(Tensor self, IntList[3] output_size, bool align_corners)
cname: VolumetricUpSamplingTrilinear
scalar_check:
grad_input: 'false'
Expand Down
16 changes: 10 additions & 6 deletions aten/src/ATen/nn_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,19 +286,23 @@ def backward_declaration(base, thnn_functions):
if arg['name'] != 'inplace']
arguments += base['buffers']

if 'upsample' in base['name']:

This comment was marked as off-topic.

This comment was marked as off-topic.

# Add input_size as parameter to upsample backwards functions
# Note that input_size is 4-dim for upsample_xxx2d
size = 2 + int(re.search(r'(\d+)d', base['name']).group(1))
input_size_arg = {'type': 'IntList', 'name': 'input_size', 'size': size}
for output_size_idx, arg in enumerate(arguments):
if arg['name'] == 'output_size':
break
arguments.insert(output_size_idx + 1, input_size_arg)

# outputs from the forward may be inputs to the backwards
for arg in arguments:
if 'output' in arg:
del arg['output']

arguments += unique_args([output_arguments(f) for f in thnn_functions])

if 'upsample' in base['name']:
# Add input_size as parameter to upsample backwards functions
# Note that input_size is 4-dim for upsample_xxx2d
size = 2 + int(re.search(r'(\d+)d', base['name']).group(1))
arguments.append({'type': 'IntList', 'name': 'input_size', 'size': size})

def initialize_output_arg(arg):
# the mask array<bool, N> specifies which return values to compute
arg['mask'] = True
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/test/basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ static void test(Type & type) {
for (int64_t i = 0; i < tensor.numel(); ++i) {
REQUIRE(tensor[i].equal(one * i));
}
for (size_t i = 0; i < tensor.numel(); ++i) {
for (size_t i = 0; i < static_cast<uint64_t>(tensor.numel()); ++i) {
REQUIRE(tensor[i].equal(one * static_cast<int64_t>(i)));
}
for (int i = 0; i < tensor.numel(); ++i) {
Expand Down
13 changes: 7 additions & 6 deletions aten/src/THCUNN/SpatialUpSamplingBilinear.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Originally developed by George Papandreou
#include "THCUNN.h"
#include "common.h"
#include "linear_upsampling.h"
#include "THCDeviceTensor.cuh"
#include "THCDeviceTensorUtils.cuh"
#include "THCDeviceUtils.cuh"
Expand All @@ -11,7 +12,7 @@

template<typename Dtype, typename Acctype>
__global__ void caffe_gpu_interp2_kernel(const int n,
const Acctype rheight, const Acctype rwidth,
const Acctype rheight, const Acctype rwidth, const bool align_corners,
const THCDeviceTensor<Dtype, 4> data1, THCDeviceTensor<Dtype, 4> data2) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
const int batchsize = data1.getSize(0);
Expand All @@ -37,13 +38,13 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
return;
}
//
const Acctype h1r = rheight * h2;
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Acctype h1lambda = h1r - h1;
const Acctype h0lambda = Acctype(1) - h1lambda;
//
const Acctype w1r = rwidth * w2;
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Acctype w1lambda = w1r - w1;
Expand All @@ -64,7 +65,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, typename Acctype>
__global__ void caffe_gpu_interp2_kernel_backward(const int n,
const Acctype rheight, const Acctype rwidth,
const Acctype rheight, const Acctype rwidth, const bool align_corners,
THCDeviceTensor<Dtype, 4> data1, const THCDeviceTensor<Dtype, 4> data2){
int index = threadIdx.x + blockIdx.x * blockDim.x;
const int batchsize = data1.getSize(0);
Expand All @@ -89,13 +90,13 @@ __global__ void caffe_gpu_interp2_kernel_backward(const int n,
return;
}
//
const Acctype h1r = rheight * h2;
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Acctype h1lambda = h1r - h1;
const Acctype h0lambda = Acctype(1) - h1lambda;
//
const Acctype w1r = rwidth * w2;
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Acctype w1lambda = w1r - w1;
Expand Down
9 changes: 5 additions & 4 deletions aten/src/THCUNN/TemporalUpSamplingLinear.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Originally developed by George Papandreou
#include "THCUNN.h"
#include "common.h"
#include "linear_upsampling.h"
#include "THCDeviceTensor.cuh"
#include "THCDeviceTensorUtils.cuh"
#include "THCDeviceUtils.cuh"
Expand All @@ -11,7 +12,7 @@

template<typename Dtype, typename Acctype>
__global__ void caffe_gpu_interp2_kernel(const int n,
const Acctype rwidth,
const Acctype rwidth, const bool align_corners,
const THCDeviceTensor<Dtype, 3> data1, THCDeviceTensor<Dtype, 3> data2) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
const int batchsize = data1.getSize(0);
Expand All @@ -33,7 +34,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
return;
}
//
const Acctype w1r = rwidth * w2;
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Acctype w1lambda = w1r - w1;
Expand All @@ -52,7 +53,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
// Backward (adjoint) operation 1 <- 2 (accumulates)
template <typename Dtype, typename Acctype>
__global__ void caffe_gpu_interp2_kernel_backward(const int n,
const Acctype rwidth,
const Acctype rwidth, const bool align_corners,
THCDeviceTensor<Dtype, 3> data1, const THCDeviceTensor<Dtype, 3> data2){
int index = threadIdx.x + blockIdx.x * blockDim.x;
const int batchsize = data1.getSize(0);
Expand All @@ -73,7 +74,7 @@ __global__ void caffe_gpu_interp2_kernel_backward(const int n,
return;
}
//
const Acctype w1r = rwidth * w2;
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Acctype w1lambda = w1r - w1;
Expand Down
21 changes: 11 additions & 10 deletions aten/src/THCUNN/VolumetricUpSamplingTrilinear.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Originally developed by George Papandreou
#include "THCUNN.h"
#include "common.h"
#include "linear_upsampling.h"
#include "THCDeviceTensor.cuh"
#include "THCDeviceTensorUtils.cuh"
#include "THCDeviceUtils.cuh"
Expand All @@ -12,7 +13,7 @@
template<typename Dtype, typename Acctype>
__launch_bounds__(1024)
__global__ void caffe_gpu_interp2_kernel(const int n,
const Acctype rdepth, const Acctype rheight, const Acctype rwidth,
const Acctype rdepth, const Acctype rheight, const Acctype rwidth, const bool align_corners,
const THCDeviceTensor<Dtype, 5> data1, THCDeviceTensor<Dtype, 5> data2) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
const int batchsize = data1.getSize(0);
Expand Down Expand Up @@ -42,31 +43,31 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
return;
}
//
const Acctype t1r = rdepth * t2;
const Acctype t1r = linear_upsampling_compute_source_index<Acctype>(rdepth, t2, align_corners);
const int t1 = t1r;
const int t1p = (t1 < depth1 - 1) ? 1 : 0;
const Acctype t1lambda = t1r - t1;
const Acctype t0lambda = Acctype(1) - t1lambda;
//
const Acctype h1r = rheight * h2;
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Acctype h1lambda = h1r - h1;
const Acctype h0lambda = Acctype(1) - h1lambda;
//
const Acctype w1r = rwidth * w2;
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Acctype w1lambda = w1r - w1;
const Acctype w0lambda = Acctype(1) - w1lambda;
//
for (int n = 0; n < batchsize ; n++){
for (int c = 0; c < channels; ++c) {
const Acctype val = t0lambda * (h0lambda * (w0lambda * data1[n][c][t1][h1][w1]
const Acctype val = t0lambda * (h0lambda * (w0lambda * data1[n][c][t1][h1][w1]
+ w1lambda * data1[n][c][t1][h1][w1+w1p])
+ h1lambda * (w0lambda * data1[n][c][t1][h1+h1p][w1]
+ w1lambda * data1[n][c][t1][h1+h1p][w1+w1p]))
+ t1lambda * (h0lambda * (w0lambda * data1[n][c][t1+t1p][h1][w1]
+ t1lambda * (h0lambda * (w0lambda * data1[n][c][t1+t1p][h1][w1]
+ w1lambda * data1[n][c][t1+t1p][h1][w1+w1p])
+ h1lambda * (w0lambda * data1[n][c][t1+t1p][h1+h1p][w1]
+ w1lambda * data1[n][c][t1+t1p][h1+h1p][w1+w1p]));
Expand All @@ -80,7 +81,7 @@ __global__ void caffe_gpu_interp2_kernel(const int n,
template <typename Dtype, typename Acctype>
__launch_bounds__(1024)
__global__ void caffe_gpu_interp2_kernel_backward(const int n,
const Acctype rdepth, const Acctype rheight, const Acctype rwidth,
const Acctype rdepth, const Acctype rheight, const Acctype rwidth, const bool align_corners,
THCDeviceTensor<Dtype, 5> data1, const THCDeviceTensor<Dtype, 5> data2){
int index = threadIdx.x + blockIdx.x * blockDim.x;
const int batchsize = data1.getSize(0);
Expand Down Expand Up @@ -109,19 +110,19 @@ __global__ void caffe_gpu_interp2_kernel_backward(const int n,
return;
}
//
const Acctype t1r = rdepth * t2;
const Acctype t1r = linear_upsampling_compute_source_index<Acctype>(rdepth, t2, align_corners);
const int t1 = t1r;
const int t1p = (t1 < depth1 - 1) ? 1 : 0;
const Acctype t1lambda = t1r - t1;
const Acctype t0lambda = Acctype(1) - t1lambda;
//
const Acctype h1r = rheight * h2;
const Acctype h1r = linear_upsampling_compute_source_index<Acctype>(rheight, h2, align_corners);
const int h1 = h1r;
const int h1p = (h1 < height1 - 1) ? 1 : 0;
const Acctype h1lambda = h1r - h1;
const Acctype h0lambda = Acctype(1) - h1lambda;
//
const Acctype w1r = rwidth * w2;
const Acctype w1r = linear_upsampling_compute_source_index<Acctype>(rwidth, w2, align_corners);
const int w1 = w1r;
const int w1p = (w1 < width1 - 1) ? 1 : 0;
const Acctype w1lambda = w1r - w1;
Expand Down
27 changes: 13 additions & 14 deletions aten/src/THCUNN/generic/SpatialUpSamplingBilinear.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#define THC_GENERIC_FILE "generic/SpatialUpSamplingBilinear.cu"
#else

#include "../linear_upsampling.h"

static inline void THNN_(SpatialUpSamplingBilinear_shapeCheck)
(THCState *state,
THCTensor *input, THCTensor *gradOutput,
Expand Down Expand Up @@ -31,7 +33,8 @@ void THNN_(SpatialUpSamplingBilinear_updateOutput)(
THCTensor *input,
THCTensor *output,
int outputHeight,
int outputWidth)
int outputWidth,
bool align_corners)
{
int nbatch = THCTensor_(size)(state, input, 0);
int channels = THCTensor_(size)(state, input, 1);
Expand All @@ -52,14 +55,14 @@ void THNN_(SpatialUpSamplingBilinear_updateOutput)(
THCDeviceTensor<real, 4> idata = toDeviceTensor<real, 4>(state, input);
THCDeviceTensor<real, 4> odata = toDeviceTensor<real, 4>(state, output);
THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0);
const accreal rheight= (outputHeight > 1) ? (accreal)(inputHeight - 1)/(outputHeight - 1) : accreal(0);
const accreal rwidth = (outputWidth > 1) ? (accreal)(inputWidth - 1)/(outputWidth - 1) : accreal(0);
const accreal rheight = linear_upsampling_compute_scale<accreal>(inputHeight, outputHeight, align_corners);
const accreal rwidth = linear_upsampling_compute_scale<accreal>(inputWidth, outputWidth, align_corners);
const int num_kernels = outputHeight * outputWidth;
const int num_threads =
THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock;
cudaStream_t stream = THCState_getCurrentStream(state);
caffe_gpu_interp2_kernel<real, accreal> <<<THCCeilDiv(num_kernels, num_threads), num_threads ,
0 , stream>>>(num_kernels, rheight, rwidth, idata, odata);
0 , stream>>>(num_kernels, rheight, rwidth, align_corners, idata, odata);
THCudaCheck(cudaGetLastError());
THCTensor_(free)(state, input);
}
Expand All @@ -74,7 +77,8 @@ void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
int inputHeight,
int inputWidth,
int outputHeight,
int outputWidth)
int outputWidth,
bool align_corners)
{
THNN_(SpatialUpSamplingBilinear_shapeCheck)
(state, NULL, gradOutput,
Expand All @@ -88,19 +92,14 @@ void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
THCTensor_(zero)(state, gradInput);
THCDeviceTensor<real, 4> data1 = toDeviceTensor<real, 4>(state, gradInput);
THCDeviceTensor<real, 4> data2 = toDeviceTensor<real, 4>(state, gradOutput);
int height1 = data1.getSize(2);
int width1 = data1.getSize(3);
int height2 = data2.getSize(2);
int width2 = data2.getSize(3);
assert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
const accreal rheight= (height2 > 1) ? (accreal)(height1 - 1)/(height2 - 1) : accreal(0);
const accreal rwidth = (width2 > 1) ? (accreal)(width1 - 1) / (width2 - 1) : accreal(0);
const int num_kernels = height2 * width2;
const accreal rheight = linear_upsampling_compute_scale<accreal>(inputHeight, outputHeight, align_corners);
const accreal rwidth = linear_upsampling_compute_scale<accreal>(inputWidth, outputWidth, align_corners);
const int num_kernels = outputHeight * outputWidth;
const int num_threads =
THCState_getCurrentDeviceProperties(state)->maxThreadsPerBlock;
cudaStream_t stream = THCState_getCurrentStream(state);
caffe_gpu_interp2_kernel_backward<real ,accreal> <<<THCCeilDiv(num_kernels, num_threads),
num_threads, 0, stream>>>(num_kernels, rheight, rwidth, data1, data2);
num_threads, 0, stream>>>(num_kernels, rheight, rwidth, align_corners, data1, data2);
THCudaCheck(cudaGetLastError());
THCTensor_(free)(state, gradInput);
THCTensor_(free)(state, gradOutput);
Expand Down
18 changes: 12 additions & 6 deletions aten/src/THCUNN/generic/THCUNN.h
Original file line number Diff line number Diff line change
Expand Up @@ -1042,7 +1042,8 @@ TH_API void THNN_(SpatialUpSamplingBilinear_updateOutput)(
THCTensor *input,
THCTensor *output,
int outputHeight,
int outputWidth);
int outputWidth,
bool align_corners);

TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
THCState *state,
Expand All @@ -1053,7 +1054,8 @@ TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
int inputHeight,
int inputWidth,
int outputHeight,
int outputWidth);
int outputWidth,
bool align_corners);

TH_API void THNN_(SpatialUpSamplingNearest_updateGradInput)(
THCState *state,
Expand Down Expand Up @@ -1336,7 +1338,8 @@ TH_API void THNN_(TemporalUpSamplingLinear_updateOutput)(
THCState *state,
THCTensor *input,
THCTensor *output,
int outputWidth);
int outputWidth,
bool align_corners);

TH_API void THNN_(TemporalUpSamplingLinear_updateGradInput)(
THCState *state,
Expand All @@ -1345,7 +1348,8 @@ TH_API void THNN_(TemporalUpSamplingLinear_updateGradInput)(
int nbatch,
int nchannels,
int inputWidth,
int outputWidth);
int outputWidth,
bool align_corners);

TH_API void THNN_(TemporalUpSamplingNearest_updateGradInput)(
THCState *state,
Expand Down Expand Up @@ -1701,7 +1705,8 @@ TH_API void THNN_(VolumetricUpSamplingTrilinear_updateOutput)(
THCTensor *output,
int outputDepth,
int outputHeight,
int outputWidth);
int outputWidth,
bool align_corners);

TH_API void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)(
THCState *state,
Expand All @@ -1714,6 +1719,7 @@ TH_API void THNN_(VolumetricUpSamplingTrilinear_updateGradInput)(
int inputWidth,
int outputDepth,
int outputHeight,
int outputWidth);
int outputWidth,
bool align_corners);

#endif
Loading