pytorch · ezyang · Jan 10, 2018 · Jan 8, 2018 · Jan 9, 2018 · Jan 9, 2018
diff --git a/aten/src/ATen/cudnn/Descriptors.cpp b/aten/src/ATen/cudnn/Descriptors.cpp
@@ -94,6 +94,13 @@ void FilterDescriptor::set(const at::Tensor &t, int64_t pad) {
     throw std::runtime_error("cuDNN supports only up to " STR(CUDNN_DIM_MAX) " dimensions");
 #undef _STR
 #undef STR
+  if (!t.is_contiguous()) {
+    // NB: It is possible for this test to be insufficient, because the
+    // Tensor passed in to set the filter descriptor may not be the actual
+    // Tensor whose data pointer is passed to cuDNN.  Nevertheless,
+    // that is the common case, so we can catch most client errors with this test.
+    throw std::runtime_error("cuDNN filters (a.k.a. weights) must be contiguous");
+  }
   int size[CUDNN_DIM_MAX];
   for (int i = 0; i < dim; ++i) {
     size[i] = (int) t.size(i);

diff --git a/aten/src/ATen/native/Convolution.cpp b/aten/src/ATen/native/Convolution.cpp
@@ -35,6 +35,21 @@ struct ConvParams {
   bool is_depthwise(const at::Tensor& input, const at::Tensor& weight) const;
 };
 
+std::ostream& operator<<(std::ostream & out, const ConvParams& params) {
+  out << "ConvParams {"
+      << "  stride = " << IntList{params.stride}
+      << "  padding = " << IntList{params.padding}
+      << "  dilation = " << IntList{params.dilation}
+      << "  transposed = " << params.transposed
+      << "  output_padding = " << IntList{params.output_padding}
+      << "  groups = " << params.groups
+      << "  benchmark = " << params.benchmark
+      << "  deterministic = " << params.deterministic
+      << "  cudnn_enabled = " << params.cudnn_enabled
+      << "}";
+  return out;
+}
+
 auto ConvParams::is_strided() const -> bool {
   bool is_strided = false;
   for (int s : stride) {

diff --git a/aten/src/ATen/native/cudnn/Conv.cpp b/aten/src/ATen/native/cudnn/Conv.cpp
@@ -863,17 +863,20 @@ Tensor cudnn_convolution_forward(
   TensorArg output{ output_t, "result", 0 };
   convolution_shape_check(c, input, weight, output, padding, stride, dilation, groups);
 
+  // See #4500
+  Tensor weight_contig = weight->contiguous();
+
 #if CUDNN_VERSION < 7000
   for (int i = 0; i < groups; i++) {
     raw_cudnn_convolution_forward_out(
         narrowGroup(*output, output_channels_dim,        i, groups),
         narrowGroup(*input,  input_channels_dim,         i, groups),
-        narrowGroup(*weight, weight_output_channels_dim, i, groups),
+        narrowGroup(weight_contig, weight_output_channels_dim, i, groups),
         padding, stride, dilation, 1, benchmark, deterministic);
   }
 #else
   raw_cudnn_convolution_forward_out(
-      *output, *input, *weight,
+      *output, *input, weight_contig,
       padding, stride, dilation, groups, benchmark, deterministic);
 #endif
 
@@ -996,17 +999,20 @@ Tensor cudnn_convolution_backward_input(
   TensorArg grad_input{ grad_input_t, "result", 0 };
   convolution_shape_check(c, grad_input, weight, grad_output, padding, stride, dilation, groups);
 
+  // See #4500
+  Tensor weight_contig = weight->contiguous();
+
 #if CUDNN_VERSION < 7000
   for (int i = 0; i < groups; i++) {
     raw_cudnn_convolution_backward_input_out(
         narrowGroup(*grad_input, input_channels_dim, i, groups),
         narrowGroup(*grad_output, output_channels_dim, i, groups),
-        narrowGroup(*weight, weight_output_channels_dim, i, groups),
+        narrowGroup(weight_contig, weight_output_channels_dim, i, groups),
         padding, stride, dilation, 1, benchmark, deterministic);
   }
 #else
   raw_cudnn_convolution_backward_input_out(
-      *grad_input, *grad_output, *weight,
+      *grad_input, *grad_output, weight_contig,
       padding, stride, dilation, groups, benchmark, deterministic);
 #endif
 

diff --git a/test/common_nn.py b/test/common_nn.py
@@ -849,6 +849,9 @@ def test_cuda(self, test_case):
                     gpu_gradOutput,
                     create_graph=True)
 
+                for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs):
+                    test_case.assertEqual(cpu_d_i, gpu_d_i, 2e-4)
+
                 # We mix output into the second backwards computation so that
                 # torch.autograd.grad doesn't complain that some inputs
                 # are unreachable (which can happen if you differentiate

diff --git a/test/test_nn.py b/test/test_nn.py
@@ -3687,6 +3687,16 @@ def test_conv_double_backward_stride(self):
                                                batch_size, inp_size, dilation,
                                                no_weight)
 
+    @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
+    def test_cudnn_noncontiguous_weight(self):
+        # Noncontiguous weights must be contiguous() before being
+        # passed to cuDNN
+        input = Variable(torch.cuda.DoubleTensor([1, 1, 1]).view(1, 1, 3))
+        weights1 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2))
+        weights2 = Variable(torch.cuda.DoubleTensor([1]).expand(1, 1, 2)).contiguous()
+        self.assertEqual(F.conv1d(input, weights1, bias=None, stride=2, dilation=2),
+                         F.conv1d(input, weights2, bias=None, stride=2, dilation=2))
+
     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
     def test_conv_double_backward_cuda(self):
         batch_size = 1
@@ -4613,7 +4623,6 @@ def smoothl1loss_no_reduce_test():
         input_size=(1, 3, 6),
         cudnn=True,
         desc='dilated',
-        FIXME_no_cuda_gradgrad_comparison=True,  # See #4500
     ),
     dict(
         fullname='ConvTranspose1d_groups',
@@ -4689,7 +4698,6 @@ def smoothl1loss_no_reduce_test():
         input_size=(1, 3, 6, 7),
         cudnn=True,
         desc='dilated',
-        FIXME_no_cuda_gradgrad_comparison=True,  # See #4500
     ),
     dict(
         module_name='ConvTranspose2d',
@@ -4703,7 +4711,6 @@ def smoothl1loss_no_reduce_test():
         constructor=lambda: nn.ConvTranspose2d(2, 4, (2, 3), groups=2),
         input_size=(1, 2, 4, 5),
         cudnn=True,
-        FIXME_no_cuda_gradgrad_comparison=True,  # See #4500
     ),
     dict(
         fullname='Conv2d_depthwise',
@@ -4885,15 +4892,13 @@ def smoothl1loss_no_reduce_test():
         constructor_args=(2, 3, (2, 3, 2)),
         cudnn=True,
         input_size=(1, 2, 4, 5, 4),
-        FIXME_no_cuda_gradgrad_comparison=True,  # See #4500
     ),
     dict(
         module_name='ConvTranspose3d',
         constructor_args=(2, 3, (2, 3, 2), 1, 0, 0, 1, True, (2, 2, 2)),
         cudnn=True,
         input_size=(1, 2, 4, 5, 4),
         desc='dilated',
-        FIXME_no_cuda_gradgrad_comparison=True,  # See #4500
     ),
     dict(
         module_name='MaxPool3d',