Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions aten/src/ATen/cudnn/Descriptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ void FilterDescriptor::set(const at::Tensor &t, int64_t pad) {
throw std::runtime_error("cuDNN supports only up to " STR(CUDNN_DIM_MAX) " dimensions");
#undef _STR
#undef STR
if (!t.is_contiguous()) {
// NB: It is possible for this test to be insufficient, because the
// Tensor passed in to set the filter descriptor may not be the actual
// Tensor whose data pointer is passed to cuDNN. Nevertheless,
// that is the common case, so we can catch most client errors with this test.
throw std::runtime_error("cuDNN filters (a.k.a. weights) must be contiguous");
}
int size[CUDNN_DIM_MAX];
for (int i = 0; i < dim; ++i) {
size[i] = (int) t.size(i);
Expand Down
15 changes: 15 additions & 0 deletions aten/src/ATen/native/Convolution.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,21 @@ struct ConvParams {
bool is_depthwise(const at::Tensor& input, const at::Tensor& weight) const;
};

std::ostream& operator<<(std::ostream & out, const ConvParams& params) {
out << "ConvParams {"
<< " stride = " << IntList{params.stride}
<< " padding = " << IntList{params.padding}
<< " dilation = " << IntList{params.dilation}
<< " transposed = " << params.transposed
<< " output_padding = " << IntList{params.output_padding}
<< " groups = " << params.groups
<< " benchmark = " << params.benchmark
<< " deterministic = " << params.deterministic
<< " cudnn_enabled = " << params.cudnn_enabled
<< "}";
return out;
}

auto ConvParams::is_strided() const -> bool {
bool is_strided = false;
for (int s : stride) {
Expand Down
14 changes: 10 additions & 4 deletions aten/src/ATen/native/cudnn/Conv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -863,17 +863,20 @@ Tensor cudnn_convolution_forward(
TensorArg output{ output_t, "result", 0 };
convolution_shape_check(c, input, weight, output, padding, stride, dilation, groups);

// See #4500
Tensor weight_contig = weight->contiguous();

#if CUDNN_VERSION < 7000
for (int i = 0; i < groups; i++) {
raw_cudnn_convolution_forward_out(
narrowGroup(*output, output_channels_dim, i, groups),
narrowGroup(*input, input_channels_dim, i, groups),
narrowGroup(*weight, weight_output_channels_dim, i, groups),
narrowGroup(weight_contig, weight_output_channels_dim, i, groups),
padding, stride, dilation, 1, benchmark, deterministic);
}
#else
raw_cudnn_convolution_forward_out(
*output, *input, *weight,
*output, *input, weight_contig,
padding, stride, dilation, groups, benchmark, deterministic);
#endif

Expand Down Expand Up @@ -996,17 +999,20 @@ Tensor cudnn_convolution_backward_input(
TensorArg grad_input{ grad_input_t, "result", 0 };
convolution_shape_check(c, grad_input, weight, grad_output, padding, stride, dilation, groups);

// See #4500
Tensor weight_contig = weight->contiguous();

#if CUDNN_VERSION < 7000
for (int i = 0; i < groups; i++) {
raw_cudnn_convolution_backward_input_out(
narrowGroup(*grad_input, input_channels_dim, i, groups),
narrowGroup(*grad_output, output_channels_dim, i, groups),
narrowGroup(*weight, weight_output_channels_dim, i, groups),
narrowGroup(weight_contig, weight_output_channels_dim, i, groups),
padding, stride, dilation, 1, benchmark, deterministic);
}
#else
raw_cudnn_convolution_backward_input_out(
*grad_input, *grad_output, *weight,
*grad_input, *grad_output, weight_contig,
padding, stride, dilation, groups, benchmark, deterministic);
#endif

Expand Down
3 changes: 3 additions & 0 deletions test/common_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,9 @@ def test_cuda(self, test_case):
gpu_gradOutput,
create_graph=True)

for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs):
test_case.assertEqual(cpu_d_i, gpu_d_i, 2e-4)

# We mix output into the second backwards computation so that
# torch.autograd.grad doesn't complain that some inputs
# are unreachable (which can happen if you differentiate
Expand Down
15 changes: 10 additions & 5 deletions test/test_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3687,6 +3687,16 @@ def test_conv_double_backward_stride(self):
batch_size, inp_size, dilation,
no_weight)

@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_cudnn_noncontiguous_weight(self):
# Noncontiguous weights must be contiguous() before being
# passed to cuDNN
input = Variable(torch.cuda.DoubleTensor([1, 1, 1]).view(1, 1, 3))
weights1 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2))
weights2 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2)).contiguous()
self.assertEqual(F.conv1d(input, weights1, bias=None, stride=2, dilation=2),
F.conv1d(input, weights2, bias=None, stride=2, dilation=2))

@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_conv_double_backward_cuda(self):
batch_size = 1
Expand Down Expand Up @@ -4613,7 +4623,6 @@ def smoothl1loss_no_reduce_test():
input_size=(1, 3, 6),
cudnn=True,
desc='dilated',
FIXME_no_cuda_gradgrad_comparison=True, # See #4500
),
dict(
fullname='ConvTranspose1d_groups',
Expand Down Expand Up @@ -4689,7 +4698,6 @@ def smoothl1loss_no_reduce_test():
input_size=(1, 3, 6, 7),
cudnn=True,
desc='dilated',
FIXME_no_cuda_gradgrad_comparison=True, # See #4500
),
dict(
module_name='ConvTranspose2d',
Expand All @@ -4703,7 +4711,6 @@ def smoothl1loss_no_reduce_test():
constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2),
input_size=(1, 2, 4, 5),
cudnn=True,
FIXME_no_cuda_gradgrad_comparison=True, # See #4500
),
dict(
fullname='Conv2d_depthwise',
Expand Down Expand Up @@ -4885,15 +4892,13 @@ def smoothl1loss_no_reduce_test():
constructor_args=(2, 3, (2, 3, 2)),
cudnn=True,
input_size=(1, 2, 4, 5, 4),
FIXME_no_cuda_gradgrad_comparison=True, # See #4500
),
dict(
module_name='ConvTranspose3d',
constructor_args=(2, 3, (2, 3, 2), 1, 0, 0, 1, True, (2, 2, 2)),
cudnn=True,
input_size=(1, 2, 4, 5, 4),
desc='dilated',
FIXME_no_cuda_gradgrad_comparison=True, # See #4500
),
dict(
module_name='MaxPool3d',
Expand Down