Complex autograd logic

anjali411 · anjali411 · commit 3a08a16baffe · 2020-08-18T13:18:05.000-07:00
ghstack-source-id: 6a2e586 Pull Request resolved: #43208
diff --git a/torch/autograd/gradcheck.py b/torch/autograd/gradcheck.py
@@ -42,6 +42,15 @@ def iter_tensors(x, only_requiring_grad=False):
             for result in iter_tensors(elem, only_requiring_grad):
                 yield result
 
+def complex_iter_tensors(x, only_requiring_grad=False):
+    if isinstance(x, torch.Tensor):
+        if (x.requires_grad or not only_requiring_grad) and x.is_complex():
+            yield x
+    elif isinstance(x, container_abcs.Iterable) and not isinstance(x, str):
+        for elem in x:
+            for result in iter_tensors(elem, only_requiring_grad):
+                yield result
+
 def get_numerical_jacobian(fn, input, target=None, eps=1e-3):
     """
     input: input to `fn`
@@ -53,37 +62,44 @@ def get_numerical_jacobian(fn, input, target=None, eps=1e-3):
     if target is None:
         target = input
     output_size = fn(input).numel()
-    jacobian = make_jacobian(target, output_size)
-
+    real_jacobian = make_jacobian(target, output_size)
     # It's much easier to iterate over flattened lists of tensors.
     # These are reference to the same objects in jacobian, so any changes
     # will be reflected in it as well.
     x_tensors = iter_tensors(target, True)
-    j_tensors = iter_tensors(jacobian)
+    real_j_tensors = iter_tensors(target)
 
-    def compute_gradient(x, idx, is_mkldnn=False):
+    imag_jacobian = make_jacobian(target, output_size)
+    imag_j_tensors = complex_iter_tensors(imag_jacobian)
+
+    contains_complex_input = imag_j_tensors is not None
+
+    def compute_gradient(x_tensor, x_idx, is_mkldnn=False, imag_delta=False):
 
         def fn_out(is_mkldnn=False):
             if not is_mkldnn:
                 return fn(input).clone()
             else:
-                return fn([x.to_mkldnn()])
+                return fn([x_tensor.to_mkldnn()])
 
-        orig = x[idx].item()
-        x[idx] = orig - eps
+        if imag_delta:
+            delta = eps * 1j
+        else:
+            delta = eps
+        orig = x_tensor[x_idx].item()
+        x_tensor[x_idx] = orig - eps
         outa = fn_out(is_mkldnn)
-        x[idx] = orig + eps
+        x_tensor[x_idx] = orig + eps
         outb = fn_out(is_mkldnn)
         if not is_mkldnn:
-            x[idx] = orig
+            x_tensor[x_idx] = orig
         r = (outb - outa) / (2 * eps)
         return r.detach().reshape(-1)
 
     # TODO: compare structure
-    for x_tensor, d_tensor in zip(x_tensors, j_tensors):
-        is_complex = x_tensor.dtype.is_complex
-        if is_complex:
-            eps *= (1 + 1j)
+    for x_tensor, real_d_tensor, imag_d_tensor in zip(x_tensors, real_j_tensors, imag_j_tensors):
+        update_imag_d_tensor = contains_complex_input and x_tensor.dtype.is_complex
+
         if x_tensor.is_sparse:
             def get_stride(size):
                 dim = len(size)
@@ -108,7 +124,9 @@ def get_stride(size):
                 for x_idx in product(*[range(m) for m in x_values.size()[1:]]):
                     indices = x_indices[i].tolist() + list(x_idx)
                     d_idx = sum(indices[k] * x_stride[k] for k in range(len(x_size)))
-                    d_tensor[d_idx] = compute_gradient(x_value, x_idx)
+                    real_d_tensor[d_idx] = compute_gradient(x_value, x_idx)
+                    if update_imag_d_tensor:
+                        imag_d_tensor[d_idx] = compute_gradient(x_value, x_idx, imag_delta=True)
         elif x_tensor.layout == torch._mkldnn:
             # Use .data here to get around the version check
             x_tensor = x_tensor.data
@@ -119,14 +137,21 @@ def get_stride(size):
                 # this is really inefficient, but without indexing implemented, there's
                 # not really a better way than converting back and forth
                 x_tensor_dense = x_tensor.to_dense()
-                d_tensor[d_idx] = compute_gradient(x_tensor_dense, x_idx, is_mkldnn=True)
+                real_d_tensor[d_idx] = compute_gradient(x_tensor_dense, x_idx, is_mkldnn=True)
+                if update_imag_d_tensor:
+                        imag_d_tensor[d_idx] = compute_gradient(x_tensor_dense, x_idx, imag_delta=True, is_mkldnn=True)
         else:
             # Use .data here to get around the version check
             x_tensor = x_tensor.data
             for d_idx, x_idx in enumerate(product(*[range(m) for m in x_tensor.size()])):
-                d_tensor[d_idx] = compute_gradient(x_tensor, x_idx)
+                real_d_tensor[d_idx] = compute_gradient(x_tensor, x_idx)
+                if update_imag_d_tensor:
+                        imag_d_tensor[d_idx] = compute_gradient(x_tensor, x_idx, imag_delta=True)
 
-    return jacobian
+    if contains_complex_input:
+        return real_jacobian, imag_jacobian
+    else:
+        return real_jacobian, None
 
 
 def get_analytical_jacobian(input, output, nondet_tol=0.0):
@@ -285,7 +310,8 @@ def fail_test(msg):
         for i, o in enumerate(func_out):
             def fn(input):
                 return _as_tuple(func(*input))[i]
-            numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+            # TODO: update this to also include check for ds_dy
+            numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)[0]
             for n in numerical:
                 if torch.ne(n, 0).sum() > 0:
                     return fail_test('Numerical gradient for function expected to be zero')
@@ -299,17 +325,30 @@ def fn(input):
             return _as_tuple(func(*input))[i]
 
         analytical, reentrant, correct_grad_sizes = get_analytical_jacobian(tupled_inputs, o, nondet_tol=nondet_tol)
-        numerical = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+        numerical_real, numerical_imag = get_numerical_jacobian(fn, tupled_inputs, eps=eps)
+        print(numerical_real, numerical_imag)
 
         if not correct_grad_sizes:
             return fail_test('Analytical gradient has incorrect size')
 
-        for j, (a, n) in enumerate(zip(analytical, numerical)):
-            if a.numel() != 0 or n.numel() != 0:
-                if not torch.allclose(a, n, rtol, atol):
-                    return fail_test('Jacobian mismatch for output %d with respect to input %d,\n'
-                                     'numerical:%s\nanalytical:%s\n' % (i, j, n, a))
-
+        if numerical_imag is None:
+            for j, (a, n) in enumerate(zip(analytical, numerical_real)):
+                if a.numel() != 0 or n.numel() != 0:
+                    if not torch.allclose(a, n, rtol, atol):
+                        return fail_test('Jacobian mismatch for output %d with respect to input %d,\n'
+                                        'numerical:%s\nanalytical:%s\n' % (i, j, n, a))
+        else:
+            for j, (a, n_re, n_im, inp) in enumerate(zip(analytical, numerical_real, numerical_imag, tupled_inputs)):
+                if a.numel() != 0 or n_re.numel() != 0:
+                    if inp.is_complex():
+                        ds_dz = 0.5 * (n_re + 1j * n_im)
+                        ds_dz_conj = 0.5 * (n_re - 1j * n_im).conj()
+                        dL_dz_conj = ds_dz + ds_dz_conj
+                    else:
+                        dL_dz_conj = n_re
+                    if not torch.allclose(a, dL_dz_conj, rtol, atol):
+                        return fail_test('Jacobian mismatch for output %d with respect to input %d,\n'
+                                        'numerical:%s\nanalytical:%s\n' % (i, j, dL_dz_conj, a))
         if not reentrant:
             return fail_test('Backward is not reentrant, i.e., running backward with same '
                              'input and grad_output multiple times gives different values, '