[dynamo] skip_guard_eval_unsafe stance for power users (#140251)

anijain2305 · pytorchmergebot · commit fb529c2c84f8 · 2024-11-21T06:28:58.000Z
Pull Request resolved: #140251 Approved by: https://github.com/jansel ghstack dependencies: #140223, #140250
diff --git a/test/dynamo/test_skip_guard_eval_unsafe.py b/test/dynamo/test_skip_guard_eval_unsafe.py
@@ -0,0 +1,146 @@
+# Owner(s): ["module: dynamo"]
+
+import torch
+import torch._dynamo.test_case
+import torch._dynamo.testing
+
+
+def my_custom_function(x):
+    return x + 1
+
+
+class RunDiffGuardTests(torch._dynamo.test_case.TestCase):
+    def test_bool_recompile(self):
+        def fn(x, y, c):
+            if c:
+                return x * y
+            else:
+                return x + y
+
+        opt_fn = torch.compile(fn, backend="inductor")
+        x = 2 * torch.ones(4)
+        y = 3 * torch.ones(4)
+
+        ref1 = opt_fn(x, y, True)
+        ref2 = opt_fn(x, y, False)
+
+        with torch.compiler.set_stance(skip_guard_eval_unsafe=True):
+            res2 = opt_fn(x, y, False)
+            res1 = opt_fn(x, y, True)
+
+        self.assertEqual(ref1, res1)
+        self.assertEqual(ref2, res2)
+
+    def test_tensor_recompile(self):
+        def fn(x, y):
+            return x * y
+
+        opt_fn = torch.compile(fn, backend="eager")
+        x = torch.randn(4, dtype=torch.float32)
+        y = torch.randn(4, dtype=torch.float32)
+
+        ref1 = opt_fn(x, y)
+
+        x64 = torch.randn(4, dtype=torch.float64)
+        y64 = torch.randn(4, dtype=torch.float64)
+        ref2 = opt_fn(x64, y64)
+
+        with torch.compiler.set_stance(skip_guard_eval_unsafe=True):
+            res1 = opt_fn(x, y)
+            res2 = opt_fn(x64, y64)
+
+        self.assertEqual(ref1, res1)
+        self.assertEqual(ref2, res2)
+
+    def test_post_recompile(self):
+        class Foo:
+            a = 4
+            b = 5
+
+        foo = Foo()
+
+        def fn(x):
+            return x + foo.a + foo.b
+
+        cnts = torch._dynamo.testing.CompileCounter()
+        opt_fn = torch.compile(fn, backend=cnts)
+
+        x = torch.randn(4)
+        ref = fn(x)
+        res = opt_fn(x)
+        self.assertEqual(ref, res)
+        self.assertEqual(cnts.frame_count, 1)
+
+        foo.a = 11
+        ref = fn(x)
+        res = opt_fn(x)
+        self.assertEqual(ref, res)
+        self.assertEqual(cnts.frame_count, 2)
+
+        with torch.compiler.set_stance(skip_guard_eval_unsafe=True):
+            # Set it back to original value
+            foo.a = 4
+            ref = fn(x)
+            res = opt_fn(x)
+            self.assertEqual(ref, res)
+
+            foo.a = 11
+            ref = fn(x)
+            res = opt_fn(x)
+            self.assertEqual(ref, res)
+
+        # Check that we are back to original behavior
+        foo.b = 8
+        ref = fn(x)
+        res = opt_fn(x)
+        self.assertEqual(ref, res)
+        self.assertEqual(cnts.frame_count, 3)
+
+    def test_fail_on_tensor_shape_change(self):
+        def fn(dt):
+            return dt["x"] + 1
+
+        x = torch.randn(4)
+        dt = {}
+        dt["x"] = x
+        opt_fn = torch.compile(fn, backend="eager")
+        opt_fn(dt)
+
+        with self.assertRaisesRegex(
+            RuntimeError, "Recompilation triggered with skip_guard_eval_unsafe stance"
+        ):
+            with torch.compiler.set_stance(skip_guard_eval_unsafe=True):
+                x = torch.randn(4, 4)
+                dt["x"] = x
+                opt_fn(dt)
+
+    def test_cache_line_pickup(self):
+        def fn(x, a=None, b=None):
+            x = x * 3
+            if a:
+                x = x * 5
+            if b:
+                x = x * 7
+            return x
+
+        opt_fn = torch.compile(fn, backend="eager")
+        x = torch.ones(4)
+
+        ref1 = opt_fn(x, a=None, b=None)
+        ref2 = opt_fn(x, a=1, b=None)
+        ref3 = opt_fn(x, a=1, b=1)
+
+        with torch.compiler.set_stance(skip_guard_eval_unsafe=True):
+            res1 = opt_fn(x, a=None, b=None)
+            res2 = opt_fn(x, a=1, b=None)
+            res3 = opt_fn(x, a=1, b=1)
+
+        self.assertEqual(ref1, res1)
+        self.assertEqual(ref2, res2)
+        self.assertEqual(ref3, res3)
+
+
+if __name__ == "__main__":
+    from torch._dynamo.test_case import run_tests
+
+    run_tests()
diff --git a/torch/_C/_dynamo/eval_frame.pyi b/torch/_C/_dynamo/eval_frame.pyi
@@ -12,6 +12,7 @@ skip_code_recursive_flag: SkipCodeRecursiveFlag
 cache_limit_hit_flag: CacheLimitHitFlag
 
 def set_eval_frame(callback: DynamoCallback) -> DynamoCallback: ...
+def set_skip_guard_eval_unsafe(value: bool) -> bool: ...
 def get_eval_frame_callback() -> DynamoCallback: ...
 def reset_code(code: types.CodeType) -> None: ...
 def unsupported(obj1: object, obj2: object) -> object: ...
diff --git a/torch/_dynamo/decorators.py b/torch/_dynamo/decorators.py
@@ -97,11 +97,17 @@ class set_stance(_DecoratorContextManager):
 
     _dynamo_forbidden = True
 
-    def __init__(self, stance: str, force_backend=None) -> None:
+    def __init__(
+        self,
+        stance: str = "default",
+        *,
+        skip_guard_eval_unsafe: bool = False,
+        force_backend=None,
+    ) -> None:
         if force_backend is not None and stance != "default":
             raise RuntimeError("non-default stance cannot have force_backend set")
 
-        self.stance = DynamoStance(stance, force_backend)
+        self.stance = DynamoStance(stance, skip_guard_eval_unsafe, force_backend)
         self.prev = _set_stance(self.stance)
 
     def __call__(self, fn):
diff --git a/torch/_dynamo/eval_frame.py b/torch/_dynamo/eval_frame.py
@@ -52,6 +52,7 @@
 from torch._C._dynamo.eval_frame import (  # noqa: F401
     reset_code,
     set_guard_error_hook,
+    set_skip_guard_eval_unsafe,
     skip_code,
     unsupported,
 )
@@ -122,6 +123,7 @@ def _maybe_set_eval_frame(callback: DynamoCallback):
 @dataclass
 class DynamoStance:
     stance: str = "default"
+    skip_guard_eval_unsafe: bool = False
     backend: Union[str, Callable[..., Any], None] = None
 
 
@@ -183,6 +185,10 @@ def fail_callback(*args, **kwargs):
         raise RuntimeError(f"invalid torch.compile stance '{_stance}'")
 
 
+def _is_skip_guard_eval_unsafe_stance():
+    return _stance.skip_guard_eval_unsafe
+
+
 def _reset_guarded_backend_cache():
     global cached_backends
     for backend in cached_backends.values():
@@ -446,10 +452,14 @@ def __enter__(self):
             )
         self.cleanup_fns = [enter() for enter in self.enter_exit_hooks]
         self.prior = _maybe_set_eval_frame(_callback_from_stance(self.callback))
+        self.prior_skip_guard_eval_unsafe = set_skip_guard_eval_unsafe(
+            _is_skip_guard_eval_unsafe_stance()
+        )
 
     def __exit__(self, exc_type, exc_val, exc_tb):
         assert self.prior is not unset
         _maybe_set_eval_frame(self.prior)
+        set_skip_guard_eval_unsafe(self.prior_skip_guard_eval_unsafe)
         self.prior = unset
         for cleanup in self.cleanup_fns:
             cleanup()
@@ -541,6 +551,9 @@ def _fn(*args, **kwargs):
 
             cleanups = [enter() for enter in self.enter_exit_hooks]
             prior = _maybe_set_eval_frame(_callback_from_stance(callback))
+            prior_skip_guard_eval_unsafe = set_skip_guard_eval_unsafe(
+                _is_skip_guard_eval_unsafe_stance()
+            )
 
             # Ensure that if an assertion occurs after graph pushes
             # something onto the DynamicLayerStack then we pop it off (the
@@ -561,6 +574,7 @@ def _fn(*args, **kwargs):
                 )
 
                 _maybe_set_eval_frame(prior)
+                set_skip_guard_eval_unsafe(prior_skip_guard_eval_unsafe)
                 for cleanup in cleanups:
                     cleanup()
 
@@ -717,10 +731,14 @@ def __call__(self, fn):
         @functools.wraps(fn)
         def _fn(*args, **kwargs):
             prior = _maybe_set_eval_frame(_callback_from_stance(self.callback))
+            prior_skip_guard_eval_unsafe = set_skip_guard_eval_unsafe(
+                _is_skip_guard_eval_unsafe_stance()
+            )
             try:
                 return fn(*args, **kwargs)
             finally:
                 _maybe_set_eval_frame(prior)
+                set_skip_guard_eval_unsafe(prior_skip_guard_eval_unsafe)
 
         _fn._torchdynamo_disable = True  # type: ignore[attr-defined]
 
diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py
@@ -253,6 +253,16 @@ def finalize(self):
     def populate_diff_guard_manager(self):
         self.diff_guard_root = self.clone_with_chosen_sources(self.diff_guard_sources)
 
+        # Ensure that that C++ side points to the updated diff guard manager.
+        # When a new GuardManagerWrapper is created, it does not have a
+        # cache_entry attribute, so it relies on the CacheEntry constructor to
+        # set the diff_guard_root in C++.  But once it is saved in the Dynamo
+        # cache, C++ side adds a cache_entry attribute. On recompiles, this
+        # cache_entry is visible, so we update the C++ side to point to the
+        # update guard manager.
+        if self.cache_entry:
+            self.cache_entry.update_diff_guard_root_manager()
+
     def clone_with_chosen_sources(self, chosen_sources):
         def filter_fn(node_mgr):
             return node_mgr.get_source() in chosen_sources
@@ -2205,6 +2215,9 @@ def __init__(self, reason):
         super().__init__()
         self.invalidation_reason = reason
 
+    def populate_diff_guard_manager(self):
+        self.diff_guard_root = None
+
 
 # NB: Naively, you'd expect this to only be a function that produces
 # the callable that constitutes the guard.  However, there is some
diff --git a/torch/compiler/__init__.py b/torch/compiler/__init__.py
@@ -230,7 +230,9 @@ def disable(fn=None, recursive=True):
     return torch._dynamo.disable(fn, recursive)
 
 
-def set_stance(stance: str, force_backend=None):
+def set_stance(
+    stance: str = "default", *, skip_guard_eval_unsafe=False, force_backend=None
+):
     """
     Set the current stance of the compiler.
     Can be used as a function, context manager, or decorator.
@@ -270,12 +272,31 @@ def bar():
               If there is cached compiled code valid for the input, it will still be used.
             - "fail_on_recompile": Raise an error when recompiling a function.
 
+        skip_guard_eval_unsafe: A flag to run only differentiating guards.
+            CAUTION - This flag is unsafe and should only be used if your setup
+            meets the following conditions.
+
+            torch.compile uses a guard system to support recompilations and
+            choose which compiled artifact to run at runtime.  These guards,
+            though efficient, add some overhead, which may impact performance in
+            scenarios where you need to optimize for minimal guard processing
+            time.  This API enables you to disable guard evaluation, assuming
+            that you have warmed up the compiled model with a sufficient variety
+            of inputs. This assumption means that, after the warmup phase, no
+            further recompilations will be necessary.  If this assumption fails,
+            there is a risk of silently producing incorrect results (hence the
+            term "unsafe" in the API name).
+
         force_backend: If `stance` is "default", this argument can be used to force `torch.compile`
             to use a specific backend. Otherwise, an error is raised.
     """
     import torch._dynamo
 
-    return torch._dynamo.set_stance(stance, force_backend=force_backend)
+    return torch._dynamo.set_stance(
+        stance,
+        skip_guard_eval_unsafe=skip_guard_eval_unsafe,
+        force_backend=force_backend,
+    )
 
 
 # forbid in graph
diff --git a/torch/csrc/dynamo/cache_entry.cpp b/torch/csrc/dynamo/cache_entry.cpp
@@ -18,6 +18,8 @@ CacheEntry::CacheEntry(const py::handle& guarded_code, PyObject* backend)
   }
   this->root_mgr = torch::dynamo::convert_to_root_guard_manager(
       this->guard_manager.attr("root"));
+  this->diff_guard_root_mgr = torch::dynamo::convert_to_root_guard_manager(
+      this->guard_manager.attr("diff_guard_root"));
 }
 
 C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(
@@ -52,6 +54,11 @@ void CacheEntry::invalidate(py::object deleted_guard_manager) {
   this->trace_annotation = "Invalidated";
 }
 
+void CacheEntry::update_diff_guard_root_manager() {
+  this->diff_guard_root_mgr = torch::dynamo::convert_to_root_guard_manager(
+      this->guard_manager.attr("diff_guard_root"));
+}
+
 PyCodeObject* CacheEntry_get_code(CacheEntry* e) {
   return (PyCodeObject*)e->code.ptr();
 }
diff --git a/torch/csrc/dynamo/cache_entry.h b/torch/csrc/dynamo/cache_entry.h
@@ -50,6 +50,8 @@ typedef struct VISIBILITY_HIDDEN CacheEntry {
   py::object compile_id;
   // root guard manager if exists
   void* root_mgr{nullptr};
+  // diff guard root guard manager if exists
+  void* diff_guard_root_mgr{nullptr};
   // backend used to create this cache entry
   PyObject* backend{nullptr};
   // Reference to owning ExtraState
@@ -70,6 +72,8 @@ typedef struct VISIBILITY_HIDDEN CacheEntry {
   py::object next();
 
   void invalidate(py::object deleted_guard_manager);
+  // Called from the python side to update the diff guard root manager
+  void update_diff_guard_root_manager();
 } CacheEntry;
 C10_DIAGNOSTIC_POP()
 C10_DIAGNOSTIC_POP()
diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c
diff --git a/torch/csrc/dynamo/extra_state.cpp b/torch/csrc/dynamo/extra_state.cpp
diff --git a/torch/csrc/dynamo/extra_state.h b/torch/csrc/dynamo/extra_state.h
diff --git a/torch/csrc/dynamo/init.cpp b/torch/csrc/dynamo/init.cpp

Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,8 @@ CacheEntry::CacheEntry(const py::handle& guarded_code, PyObject* backend)`
`18`	`18`	`}`
`19`	`19`	`this->root_mgr = torch::dynamo::convert_to_root_guard_manager(`
`20`	`20`	`this->guard_manager.attr("root"));`
	`21`	`+ this->diff_guard_root_mgr = torch::dynamo::convert_to_root_guard_manager(`
	`22`	`+ this->guard_manager.attr("diff_guard_root"));`
`21`	`23`	`}`
`22`	`24`
`23`	`25`	`C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(`
`@@ -52,6 +54,11 @@ void CacheEntry::invalidate(py::object deleted_guard_manager) {`
`52`	`54`	`this->trace_annotation = "Invalidated";`
`53`	`55`	`}`
`54`	`56`
	`57`	`+void CacheEntry::update_diff_guard_root_manager() {`
	`58`	`+ this->diff_guard_root_mgr = torch::dynamo::convert_to_root_guard_manager(`
	`59`	`+ this->guard_manager.attr("diff_guard_root"));`
	`60`	`+}`
	`61`	`+`
`55`	`62`	`PyCodeObject* CacheEntry_get_code(CacheEntry* e) {`
`56`	`63`	`return (PyCodeObject*)e->code.ptr();`
`57`	`64`	`}`