[qunat][pt2e] Support allow_implicit_sharing flag (#112929)

jerryzh168 · pytorchmergebot · commit 12c257cc00ef · 2023-11-08T23:47:17.000Z
Summary: For a Node: node1 and edge: (node1, node2), since they are observing the same Tensor, we may want to implicitly share observers, this flag allows people to turn off this behavior for the output of the node See the test_allow_implicit_sharing test for use case Test Plan: python test/test_quantization.py TestQuantizePT2E.test_allow_implicit_sharing Reviewers: Subscribers: Tasks: Tags: Pull Request resolved: #112929 Approved by: https://github.com/kimishpatel
diff --git a/test/quantization/pt2e/test_quantize_pt2e.py b/test/quantization/pt2e/test_quantize_pt2e.py
@@ -1060,6 +1060,83 @@ def validate(self, model: torch.fx.GraphModule) -> None:
 
         self._test_transitive_sharing_with_cat_helper(BackendAQuantizer())
 
+    def test_allow_implicit_sharing(self):
+        """This tests the allow_transitive_sharing flag of QuantizationAnnotation, that is
+        if a node is configured with allow_implicit_sharing=False, we will not have implicit sharing
+        for node and (node, consumer) even they refer to the same Tensor
+
+        x1 -> add1 -----> add3
+        x2 -/              /
+               x3 -> add2 /
+               x4 -/
+
+        all add has shared input and output, and second input is using shared quantization spec pointing
+        to first input, but we set allow_implicit_sharing to False for all add nodes so input and output of add1,
+        add2 and add3 will each belong to one sharing group, so we'll have:
+
+        x1 -> obs1 -> add1 -> obs1 -> obs3--> add3 -> obs3
+        x2 -> obs1 -/                         /
+               x3 -> obs2 -> add2 -> obs2 -> obs3
+               x4 -> obs2 -/
+        """
+        # TODO: refactor this to a common util
+        class BackendAQuantizer(Quantizer):
+            def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+                for node in model.graph.nodes:
+                    if node.target is torch.ops.aten.add.Tensor:
+                        add_node = node
+                        first_input_node = add_node.args[0]
+                        second_input_node = add_node.args[1]
+                        input_qspec_map = {}
+                        act_qspec = QuantizationSpec(
+                            dtype=torch.uint8,
+                            quant_min=0,
+                            quant_max=255,
+                            qscheme=torch.per_tensor_affine,
+                            is_dynamic=False,
+                            observer_or_fake_quant_ctr=observer.default_observer,
+                        )
+                        input_qspec_map[second_input_node] = act_qspec
+                        share_qparams_with_input_act1_qspec = SharedQuantizationSpec((second_input_node, add_node))
+                        input_qspec_map[first_input_node] = share_qparams_with_input_act1_qspec
+
+                        add_node.meta[
+                            "quantization_annotation"
+                        ] = QuantizationAnnotation(
+                            input_qspec_map=input_qspec_map,
+                            output_qspec=share_qparams_with_input_act1_qspec,
+                            allow_implicit_sharing=False,
+                            _annotated=True,
+                        )
+
+            def validate(self, model: torch.fx.GraphModule) -> None:
+                pass
+
+        m = TestHelperModules.ThreeAdd().eval()
+        example_inputs = (torch.randn(1, 3, 5, 5), torch.randn(1, 3, 5, 5), torch.randn(1, 3, 5, 5), torch.randn(1, 3, 5, 5))
+
+        # program capture
+        m = capture_pre_autograd_graph(
+            m,
+            example_inputs,
+        )
+        quantizer = BackendAQuantizer()
+        m = prepare_pt2e(m, quantizer)
+        m(*example_inputs)
+        observers = []
+        for n in m.graph.nodes:
+            if n.target == torch.ops.aten.add.Tensor:
+                input_obs1 = getattr(m, n.args[0].target)
+                input_obs2 = getattr(m, n.args[1].target)
+                output_obs = getattr(m, list(n.users)[0].target)
+                self.assertIs(input_obs1, input_obs2)
+                self.assertIs(input_obs1, output_obs)
+                observers.append(input_obs1)
+        assert len(observers) == 3
+        self.assertIsNot(observers[0], observers[1])
+        self.assertIsNot(observers[0], observers[2])
+        self.assertIsNot(observers[1], observers[2])
+
     def test_int16(self):
         class Int16ActQuantizer(Quantizer):
             def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
diff --git a/test/quantization/pt2e/test_xnnpack_quantizer.py b/test/quantization/pt2e/test_xnnpack_quantizer.py
@@ -373,7 +373,7 @@ def test_propagate_annotation(self):
             ]:
                 input_act = getattr(m, n.args[0].target)
                 output_act = getattr(m, list(n.users)[0].target)
-                self.assertTrue(input_act is output_act)
+                self.assertIs(input_act, output_act)
 
         m = convert_pt2e(m, fold_quantize=True)
         node_occurrence = {
diff --git a/torch/ao/quantization/pt2e/prepare.py b/torch/ao/quantization/pt2e/prepare.py
@@ -29,6 +29,7 @@
     "prepare",
 ]
 
+
 def _find_root(edge_or_node: EdgeOrNode, shared_with_map: Dict[EdgeOrNode, EdgeOrNode]) -> EdgeOrNode:
     """Find the root node for the sharing tree
     Args:
@@ -177,21 +178,22 @@ def _get_edge_or_node_to_group_id(edge_or_node_to_qspec: Dict[EdgeOrNode, Quanti
             # find root_qspec for `arg` Node (the output of previous node)
             assert isinstance(input_edge, tuple)
             arg, n = input_edge
-            arg_as_output_root_qspec = None
-            if arg in edge_or_node_to_qspec:
-                arg_as_output_qspec = edge_or_node_to_qspec[arg]
-                arg_as_output_root_qspec = _find_root_qspec(arg_as_output_qspec, edge_or_node_to_qspec, shared_with_map)
-            # TODO: add assertions for types of root qspecs
-            if (
-                arg_as_output_root_qspec is not None and
-                _has_same_dtype(arg_as_output_root_qspec, input_edge_root_qspec) and
-                _has_same_is_dynamic(arg_as_output_root_qspec, input_edge_root_qspec)
-            ):
-                # the input arg to the node should reuse the existing output observer for arg
-                # since dtype is the same (we may want to extend this to be a more strict check
-                # in the future)
-                # so we point from `input_edge` to `arg` (output of the argument)
-                _union(arg, input_edge, shared_with_map)
+            if n.meta["quantization_annotation"].allow_implicit_sharing:
+                arg_as_output_root_qspec = None
+                if arg in edge_or_node_to_qspec:
+                    arg_as_output_qspec = edge_or_node_to_qspec[arg]
+                    arg_as_output_root_qspec = _find_root_qspec(arg_as_output_qspec, edge_or_node_to_qspec, shared_with_map)
+                # TODO: add assertions for types of root qspecs
+                if (
+                    arg_as_output_root_qspec is not None and
+                    _has_same_dtype(arg_as_output_root_qspec, input_edge_root_qspec) and
+                    _has_same_is_dynamic(arg_as_output_root_qspec, input_edge_root_qspec)
+                ):
+                    # the input arg to the node should reuse the existing output observer for arg
+                    # since dtype is the same (we may want to extend this to be a more strict check
+                    # in the future)
+                    # so we point from `input_edge` to `arg` (output of the argument)
+                    _union(arg, input_edge, shared_with_map)
             _update_shared_with(input_edge, qspec, shared_with_map)
 
     # now that we get the sharing relations between all edges and nodes, we can assingn group ids
diff --git a/torch/ao/quantization/quantizer/quantizer.py b/torch/ao/quantization/quantizer/quantizer.py
@@ -114,6 +114,7 @@ class SharedQuantizationSpec(QuantizationSpecBase):
     Quantization spec for the Tensors whose quantization parameters are shared with other Tensors
     """
 
+    # the edge or node to share observer or fake quant instances with
     edge_or_node: EdgeOrNode
 
 
@@ -146,6 +147,11 @@ class QuantizationAnnotation:
     # TODO: change the value to QuantizationSpec in a separate PR
     output_qspec: Optional[QuantizationSpecBase] = None
 
+    # For a Node: node1 and edge: (node1, node2), since they are observing the same
+    # Tensor, we may want to implicitly share observers, this flag allows people to
+    # turn off this behavior for the output of the node
+    allow_implicit_sharing: bool = True
+
     # whether the node is annotated or not
     _annotated: bool = False
 
diff --git a/torch/testing/_internal/common_quantization.py b/torch/testing/_internal/common_quantization.py
@@ -2585,6 +2585,13 @@ def forward(self, x1, x2, x3, x4):
             w = torch.cat([z, y])
             return w
 
+    class ThreeAdd(torch.nn.Module):
+        def forward(self, x1, x2, x3, x4):
+            y = x1 + x2
+            z = x3 + x4
+            w = y + z
+            return w
+
     class EmbeddingModule(torch.nn.Module):
         def __init__(self):
             super().__init__()