Stop immediately specializing common constants 0/1 for plain int

ezyang · ezyang · commit d36b83bcadc6 · 2024-07-02T07:49:03.000-07:00
Fixes #128319 Signed-off-by: Edward Z. Yang <ezyang@meta.com> ghstack-source-id: c431efc Pull Request resolved: #128327
diff --git a/docs/source/torch.compiler_dynamo_deepdive.rst b/docs/source/torch.compiler_dynamo_deepdive.rst
@@ -598,16 +598,19 @@ This is mostly useful for debugging purposes.
 0, 1 are always specialized
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Regardless of whether we mark a dimension as dynamic, or we have traced
-an integer as dynamic, if we pass an input where that dimension is 0 or
-1, Dynamo will trace it as non-dynamic and it will generate a specific
-graph for it. This is the reason why in the example above we find guards
-of the form ``2 <= L['a'].size()[0]``.
+Regardless of whether we mark a dimension as dynamic, if we pass an input
+where that dimension is 0 or 1, Dynamo will trace it as non-dynamic and it
+will generate a specific graph for it. This is the reason why in the example
+above we find guards of the form ``2 <= L['a'].size()[0]``.
 
 There are several reasons for this choice. There are two particularly
 important - A tensor is empty if and only if any of its dimensions is
 zero - A tensor can only be contiguous if one of the strides is one
 
+This policy decision does NOT apply to plain Python ints; if we think a Python
+int should be compiled dynamically, we won't specialize them by default;
+instead, whether or not it gets specialized depends on its usage.
+
 Duck shaping
 ^^^^^^^^^^^^
 
diff --git a/test/dynamo/test_backward_higher_order_ops.py b/test/dynamo/test_backward_higher_order_ops.py
@@ -192,17 +192,20 @@ def fn(x, y):
                 actual,
                 """\
 class GraphModule(torch.nn.Module):
-    def forward(self, L_inputs_ : list):
+    def forward(self, L_inputs_ : list, L_hooks_0_keywords_fn_keywords_obj_counter: "Sym(s1)"):
         l_inputs_ = L_inputs_
+        l_hooks_0_keywords_fn_keywords_obj_counter = L_hooks_0_keywords_fn_keywords_obj_counter
 
         getitem: "f32[s0]" = l_inputs_[0];  l_inputs_ = None
 
         new_grad: "f32[s0]" = torch.clone(getitem)
 
+        add: "Sym(s1 + 1)" = l_hooks_0_keywords_fn_keywords_obj_counter + 1;  l_hooks_0_keywords_fn_keywords_obj_counter = None
+
         result: "f32[s0]" = getitem * getitem;  getitem = None
 
         new_grad_1: "f32[s0]" = torch.clone(result);  result = None
-        return (new_grad, new_grad_1)
+        return (new_grad, new_grad_1, add)
 """,
             )
 
diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
@@ -2651,9 +2651,12 @@ def fn(x: int, y: torch.Tensor):
             ref = fn(x, y)
             res = opt_fn(x, y)
             self.assertEqual(ref, res)
-        # It's all traced once with x = 1, x = 2 and then x = ks0
-        # For dynamic it's x=1 and x=ks0
-        self.assertEqual(cnts.frame_count, ifdynstaticdefault(3, 2))
+        # It's all traced once with x = 1 and then x = ks0
+        # For dynamic it's x=ks0
+        if torch._dynamo.config.assume_static_by_default:
+            self.assertExpectedInline(str(cnts.frame_count), """2""")
+        else:
+            self.assertExpectedInline(str(cnts.frame_count), """2""")
 
     def test_numpy_with_builtin_type(self):
         x = np.random.rand(5)
diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py
@@ -1238,7 +1238,10 @@ def test_self_mutating1(self):
         out4 = [opt_m4(i), opt_m4(i), opt_m4(i)]
         self.assertTrue(torch._dynamo.testing.same(out2, out3))
         self.assertTrue(torch._dynamo.testing.same(out2, out4))
-        self.assertEqual(cnt.frame_count, 3)
+        if torch._dynamo.config.assume_static_by_default:
+            self.assertExpectedInline(cnt.frame_count, """2""")
+        else:
+            self.assertExpectedInline(cnt.frame_count, """1""")
 
     @patch.object(torch._dynamo.config, "raise_on_ctx_manager_usage", False)
     def test_generation_tag(self):
diff --git a/test/dynamo/test_repros.py b/test/dynamo/test_repros.py
@@ -4350,7 +4350,7 @@ def fn(x, y):
         opt_fn = torch._dynamo.optimize(cnt, nopython=True)(fn)
         x = torch.rand([2, 2])
         opt_fn(x, x)
-        self.assertEqual(cnt.frame_count, 1)
+        self.assertExpectedInline(cnt.frame_count, """1""")
 
     @torch._dynamo.config.patch(capture_scalar_outputs=True)
     def test_unbacked_arange_in_bounds(self):
@@ -4419,7 +4419,7 @@ def fn(x, y):
         opt_fn = torch._dynamo.optimize(cnt, nopython=True)(fn)
         x = torch.rand([2, 2])
         self.assertEqual(opt_fn(x, [5]), fn(x, [5]))
-        self.assertEqual(cnt.frame_count, 1)
+        self.assertExpectedInline(cnt.frame_count, """1""")
 
     def test_user_ctor_ctx_manager_custom_init_graph_break(self):
         counter = [0]
@@ -4447,7 +4447,10 @@ def fn(x, counter):
         for i in range(0, 10):
             opt_fn(x, counter)
         self.assertEqual(counter[0], 12)
-        self.assertEqual(cnt.frame_count, torch._dynamo.utils.ifdynstaticdefault(3, 2))
+        if torch._dynamo.config.assume_static_by_default:
+            self.assertExpectedInline(cnt.frame_count, """2""")
+        else:
+            self.assertExpectedInline(cnt.frame_count, """1""")
 
     @unittest.expectedFailure
     def test_many_overlapping_inputs_does_not_explode_guards(self):
diff --git a/test/test_linalg.py b/test/test_linalg.py
@@ -2832,6 +2832,7 @@ def test_inverse_many_batches_helper(torch_inverse, b, n):
     @skipCPUIfNoLapack
     @onlyNativeDeviceTypes   # TODO: XLA doesn't raise exception
     @dtypes(*floating_and_complex_types())
+    @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/129882")
     def test_inverse_errors(self, device, dtype):
         # inverse expects batches of square matrices as input
         with self.assertRaisesRegex(RuntimeError, "must be batches of square matrices"):
@@ -2976,6 +2977,7 @@ def test_pinv_errors_and_warnings(self, device, dtype):
     @skipCUDAIfNoMagmaAndNoCusolver
     @skipCPUIfNoLapack
     @dtypes(*floating_and_complex_types())
+    @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/129882")
     def test_inv_errors_and_warnings(self, device, dtype):
         # inv expects batches of square matrices as input
         a = torch.randn(2, 3, 4, 3, dtype=dtype, device=device)
diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py
@@ -17,6 +17,8 @@
 import weakref
 from typing import Any, List, NamedTuple, Optional, Union
 
+from torch._utils_internal import justknobs_check
+
 from torch.utils._sympy.value_ranges import ValueRanges
 
 try:
@@ -1248,15 +1250,22 @@ def wrap_literal(self, value):
             # unspecializing int by default, but still
             # specialize for the following conditions
             if not TracingContext.get().force_unspec_int_unbacked_size_like and (
-                value in self._common_constants()
                 # Assume integers from global variables want to be specialized
-                or not self.source.guard_source().is_local()
+                not self.source.guard_source().is_local()
                 # Assume that integers that came from NN modules want to be
                 # specialized (as we don't expect users to be changing the
                 # NN modules on the fly)
                 or self.source.guard_source().is_nn_module()
                 or is_from_defaults(self.source)
                 or is_cell_contents(self.source)
+                # TODO: Delete this condition when rollout is done.  NB: this
+                # condition never evaluates True in open source
+                or (
+                    not justknobs_check(
+                        "pytorch/dynamo:enable_unspecialize_zero_one_plain_int"
+                    )
+                    and value in self._common_constants()
+                )
             ):
                 self.install_guards(GuardBuilder.CONSTANT_MATCH)
                 return ConstantVariable.create(value=value, source=self.source)