Add setup_context staticmethod to autograd.Function (#89859)

zou3519 · pytorchmergebot · commit eb314f9b1ac2 · 2022-12-08T19:31:04.000Z
Adds a setup_context staticmethod to autograd.Function. If it exists, then the user splits the ctx-specific logic from the forward() and puts it in the setup_context staticmethod. Docs will come later when we remove the feature flag. Test Plan: - some light tests Pull Request resolved: #89859 Approved by: https://github.com/soulitzer
diff --git a/test/test_autograd.py b/test/test_autograd.py
@@ -544,6 +544,94 @@ def fn(x):
         with self.assertRaisesRegex(RuntimeError, "expects an grad_fn"):
             torch._C._will_engine_execute_node(out)
 
+    def test_custom_function_setup_context_simple(self):
+        class MySquare(Function):
+            @staticmethod
+            def forward(x):
+                return x ** 2
+
+            @staticmethod
+            def setup_context(ctx, inputs, outputs):
+                x, = inputs
+                ctx.save_for_backward(x)
+
+            @staticmethod
+            def backward(ctx, gO):
+                x, = ctx.saved_tensors
+                return gO * 2 * x
+
+        with torch.autograd.function._set_autograd_function_extension_enabled(True):
+            x = torch.randn([], requires_grad=True)
+            y = MySquare.apply(x)
+            gx, = torch.autograd.grad(y, x)
+            self.assertEqual(gx, 2 * x)
+
+    def test_custom_function_setup_context_multi_output(self):
+        # Multiple outputs with some non-Tensor outputs.
+        class MySquare(Function):
+            @staticmethod
+            def forward(x):
+                two_x = x.item() * 2
+                return x ** 2, two_x
+
+            @staticmethod
+            def setup_context(ctx, inputs, outputs):
+                x, = inputs
+                _, two_x = outputs
+                ctx.two_x = two_x
+
+            @staticmethod
+            @once_differentiable
+            def backward(ctx, gO, _):
+                return gO * ctx.two_x
+
+        with torch.autograd.function._set_autograd_function_extension_enabled(True):
+            x = torch.randn([], requires_grad=True)
+            y, _ = MySquare.apply(x)
+            gx, = torch.autograd.grad(y, x)
+            self.assertEqual(gx, 2 * x)
+
+    def test_custom_function_setup_context_multi_input(self):
+        class MyReshape(Function):
+            @staticmethod
+            def forward(x, shape, scale_forward, scale_backward):
+                return x.reshape(shape) * scale_forward
+
+            @staticmethod
+            def setup_context(ctx, inputs, outputs):
+                x, shape, scale_forward, scale_backward = inputs
+                ctx.scale_backward = scale_backward
+                ctx.x_shape = x.shape
+
+            @staticmethod
+            def backward(ctx, gO):
+                return gO.reshape(ctx.x_shape) * ctx.scale_backward, None, None, None
+
+        class MyReshapeRef(Function):
+            @staticmethod
+            def forward(ctx, x, shape, scale_forward, scale_backward):
+                ctx.scale_backward = scale_backward
+                ctx.x_shape = x.shape
+                return x.reshape(shape) * scale_forward
+
+            @staticmethod
+            def backward(ctx, gO):
+                return gO.reshape(ctx.x_shape) * ctx.scale_backward, None, None, None
+
+        def test(x, shape, scale_forward, scale_backward):
+            y = MyReshape.apply(x, shape, scale_forward, scale_backward).sum()
+            gx, = torch.autograd.grad(y, x)
+
+            y_expected = MyReshapeRef.apply(x, shape, scale_forward, scale_backward).sum()
+            gx_expected, = torch.autograd.grad(y_expected, x)
+
+            self.assertEqual(y_expected, y)
+            self.assertEqual(gx_expected, gx)
+
+        with torch.autograd.function._set_autograd_function_extension_enabled(True):
+            test(torch.randn(24, requires_grad=True), (3, 8), 7, 11)
+            test(torch.randn(2, 3, 4, requires_grad=True), (6, 4), -1, 2)
+
     def test_accumulate_grad(self):
         grad_output = torch.ones(5, 5)
 
diff --git a/torch/autograd/function.py b/torch/autograd/function.py
@@ -1,3 +1,4 @@
+import contextlib
 import torch
 import torch._C as _C
 from torch._C import _functions
@@ -468,6 +469,17 @@ def traceable(fn_cls):
     return fn_cls
 
 
+# Private feature flag. Not user-facing.
+@contextlib.contextmanager
+def _set_autograd_function_extension_enabled(enabled=True):
+    try:
+        prev_state = torch._C._is_autograd_function_extension_enabled()
+        torch._C._set_autograd_function_extension_enabled(enabled)
+        yield
+    finally:
+        torch._C._set_autograd_function_extension_enabled(prev_state)
+
+
 class InplaceFunction(Function):
 
     def __init__(self, inplace=False):
diff --git a/torch/csrc/autograd/python_function.cpp b/torch/csrc/autograd/python_function.cpp
@@ -821,6 +821,43 @@ PyObject* THPFunction_maybe_clear_saved_tensors(
   END_HANDLE_TH_ERRORS
 }
 
+namespace {
+
+THPObjectPtr make_ctx_input_tuple(
+    THPFunction* ctx,
+    const UnpackedInput& unpacked_input,
+    int64_t num_args) {
+  THPObjectPtr ctx_input_tuple(PyTuple_New(num_args + 1));
+  if (!ctx_input_tuple)
+    return {};
+  Py_INCREF(ctx);
+  PyTuple_SET_ITEM(ctx_input_tuple.get(), 0, (PyObject*)ctx);
+  for (const auto i : c10::irange(num_args)) {
+    PyObject* arg = PyTuple_GET_ITEM(unpacked_input.input_tuple.get(), i);
+    Py_INCREF(arg);
+    PyTuple_SET_ITEM(ctx_input_tuple.get(), i + 1, arg);
+  }
+  return ctx_input_tuple;
+}
+
+THPObjectPtr make_ctx_input_output_tuple(
+    THPFunction* ctx,
+    UnpackedInput& unpacked_input,
+    PyObject* outputs) {
+  THPObjectPtr result(PyTuple_New(3));
+  if (!result)
+    return {};
+  Py_INCREF(ctx);
+  Py_INCREF(unpacked_input.input_tuple.get());
+  Py_INCREF(outputs);
+  PyTuple_SET_ITEM(result.get(), 0, (PyObject*)ctx);
+  PyTuple_SET_ITEM(result.get(), 1, unpacked_input.input_tuple.get());
+  PyTuple_SET_ITEM(result.get(), 2, outputs);
+  return result;
+}
+
+} // namespace
+
 PyObject* THPFunction_apply(PyObject* cls, PyObject* inputs) {
   HANDLE_TH_ERRORS
 
@@ -865,29 +902,51 @@ PyObject* THPFunction_apply(PyObject* cls, PyObject* inputs) {
   ctx->needs_input_grad = input_info.needs_input_grad.release();
   ctx->is_variable_input = std::move(input_info.is_variable_input);
 
-  // Prepend ctx to input_tuple, in preparation for static method call
+  // autograd.Function may optionally contain a setup_context staticmethod.
+  // In this case, autograd.Function.forward does NOT accept a ctx object.
+  bool has_separate_setup_context_fn =
+      (isAutogradFunctionExtensionEnabled() &&
+       PyObject_HasAttrString(cls, "setup_context"));
+
   auto num_args = PyTuple_GET_SIZE(inputs);
-  THPObjectPtr ctx_input_tuple(PyTuple_New(num_args + 1));
-  if (!ctx_input_tuple)
-    return nullptr;
-  Py_INCREF(ctx);
-  PyTuple_SET_ITEM(ctx_input_tuple.get(), 0, (PyObject*)ctx);
-  for (const auto i : c10::irange(num_args)) {
-    PyObject* arg = PyTuple_GET_ITEM(unpacked_input.input_tuple.get(), i);
-    Py_INCREF(arg);
-    PyTuple_SET_ITEM(ctx_input_tuple.get(), i + 1, arg);
-  }
 
   // Call forward
-  THPObjectPtr tensor_outputs;
+  THPObjectPtr outputs;
   {
     AutoGradMode grad_mode(false);
     at::AutoFwGradMode fw_grad_mode(false);
     THPObjectPtr forward_fn(PyObject_GetAttrString(cls, "forward"));
     if (!forward_fn)
       return nullptr;
-    tensor_outputs = PyObject_CallObject(forward_fn, ctx_input_tuple);
-    if (!tensor_outputs)
+    if (has_separate_setup_context_fn) {
+      // call forward followed by setup_context
+      outputs = PyObject_CallObject(forward_fn, unpacked_input.input_tuple);
+      if (!outputs) {
+        return nullptr;
+      }
+      // signature is setup_context(ctx, inputs, outputs)
+      auto ctx_input_output_tuple =
+          make_ctx_input_output_tuple(ctx, unpacked_input, outputs);
+      if (!ctx_input_output_tuple) {
+        return nullptr;
+      }
+      THPObjectPtr setup_context_fn(
+          PyObject_GetAttrString(cls, "setup_context"));
+      auto result =
+          PyObject_CallObject(setup_context_fn, ctx_input_output_tuple);
+      if (!result) {
+        return nullptr;
+      }
+    } else {
+      // call forward
+      auto ctx_input_tuple =
+          make_ctx_input_tuple(ctx, unpacked_input, num_args);
+      if (!ctx_input_tuple) {
+        return nullptr;
+      }
+      outputs = PyObject_CallObject(forward_fn, ctx_input_tuple);
+    }
+    if (!outputs)
       return nullptr;
   }
 
@@ -897,7 +956,7 @@ PyObject* THPFunction_apply(PyObject* cls, PyObject* inputs) {
       ctx,
       unpacked_input,
       inputs,
-      std::move(tensor_outputs),
+      std::move(outputs),
       is_executable,
       node);
   END_HANDLE_TH_ERRORS