[quant] Fix fuse linear pass (#40549)

jerryzh168 · facebook-github-bot · commit e440c370c5ea · 2020-06-25T07:10:09.000-07:00
Summary: Pull Request resolved: #40549 Currently we didn't check if %weight_t is produced by `aten::t`, this will fuse some `matmul`/`addmm` that is not 2d to `aten::linear`, which is incorrect Test Plan: Imported from OSS Differential Revision: D22225921 fbshipit-source-id: 9723e82fdbac6d8e1a7ade22f3a9791321ab12b6
diff --git a/test/quantization/test_quantize_jit.py b/test/quantization/test_quantize_jit.py
@@ -309,7 +309,11 @@ def forward(self, x):
         x2 = torch.rand(5, 5)
         w2 = torch.rand(5, 5)
         b2 = torch.rand(5)
-        for has_bias, (x, weight, b) in itertools.product([True, False], [(x1, w1, b1), (x2, w2, b2)]):
+
+        x3 = torch.rand(5, 5, 5)
+        w3 = torch.rand(5, 5)
+        b3 = torch.rand(5)
+        for has_bias, (x, weight, b) in itertools.product([True, False], [(x1, w1, b1), (x2, w2, b2), (x3, w3, b3)]):
             bias = b if has_bias else None
             model = torch.jit.trace(FunctionalLinear(weight, bias), [x])
             torch._C._jit_pass_fuse_linear(model.graph)
@@ -319,6 +323,29 @@ def forward(self, x):
             for cn in check_not:
                 FileCheck().check_not(cn) \
                            .run(model.graph)
+            # make sure it runs
+            model(x)
+
+        # check matmuls are not fused
+        class Matmul(torch.nn.Module):
+            def __init__(self, weight):
+                super(Matmul, self).__init__()
+                self.weight = weight
+
+            def forward(self, x):
+                return torch.matmul(x, self.weight)
+
+        x = torch.rand(5, 6, 5)
+        w = torch.rand(5, 5, 100)
+        model = torch.jit.trace(Matmul(w), [x])
+        torch._C._jit_pass_fuse_linear(model.graph)
+        # check 3d matmul is not fused
+        FileCheck().check("aten::matmul") \
+                   .run(model.graph)
+        FileCheck().check_not("aten::linear") \
+                   .run(model.graph)
+        # make sure it runs
+        model(x)
 
     def test_insert_observers(self):
         class M(torch.nn.Module):
@@ -2672,7 +2699,7 @@ class TestQuantizeDynamicJitOps(QuantizationTestCase):
     for individual ops end to end.
     """
     @override_qengines
-    def test_quantized_linear_dynamic(self):
+    def test_linear(self):
         class FunctionalLinear(torch.nn.Module):
             def __init__(self, weight, bias):
                 super(FunctionalLinear, self).__init__()
diff --git a/torch/csrc/jit/passes/fuse_linear.cpp b/torch/csrc/jit/passes/fuse_linear.cpp
@@ -21,10 +21,21 @@ void FuseLinear(std::shared_ptr<Graph>& graph) {
     return is_int_constant(match, vmap, "beta", 1);
   };
 
+  // check %weight_t is produced by `aten::t` to make sure
+  // we can transform the pattern to `aten::linear`
+  auto weight_transposed =
+      [](const Match& match,
+         const std::unordered_map<std::string, Value*>& vmap) {
+        const auto& match_vmap = match.values_map;
+        auto v = match_vmap.at(vmap.at("weight_t"));
+        return v->node()->kind() == Symbol::aten("t");
+      };
+
   // replace addmm pattern to linear
   SubgraphRewriter addmm_to_linear;
   addmm_to_linear.RegisterRewritePattern(addmm_pattern, fused_linear_addmm);
-  addmm_to_linear.runOnGraph(graph, {aten_add_alpha_is_one, beta_is_one});
+  addmm_to_linear.runOnGraph(
+      graph, {aten_add_alpha_is_one, beta_is_one, weight_transposed});
 
   std::string matmul_add_pattern = R"IR(
     graph(%input, %weight_t, %bias, %alpha):
@@ -40,7 +51,8 @@ void FuseLinear(std::shared_ptr<Graph>& graph) {
   SubgraphRewriter matmuladd_to_linear;
   matmuladd_to_linear.RegisterRewritePattern(
       matmul_add_pattern, fused_linear_matmul);
-  matmuladd_to_linear.runOnGraph(graph, aten_add_alpha_is_one);
+  matmuladd_to_linear.runOnGraph(
+      graph, {aten_add_alpha_is_one, weight_transposed});
 
   std::string matmul_pattern = R"IR(
     graph(%input, %weight_t):
@@ -57,7 +69,7 @@ void FuseLinear(std::shared_ptr<Graph>& graph) {
   SubgraphRewriter matmul_to_linear;
   matmul_to_linear.RegisterRewritePattern(
       matmul_pattern, fused_linear_bias_none);
-  matmul_to_linear.runOnGraph(graph);
+  matmul_to_linear.runOnGraph(graph, weight_transposed);
 
   // clean up extra transpose for the weight of aten::linear
   std::string linear_weight_extra_transpose = R"IR(
diff --git a/torch/csrc/jit/passes/graph_rewrite_helper.cpp b/torch/csrc/jit/passes/graph_rewrite_helper.cpp
@@ -61,6 +61,7 @@ std::unordered_map<std::string, c10::IValue> getConvParams(
 }
 
 void replaceConvolutionWithAtenConv(std::shared_ptr<Graph>& graph) {
+  // TODO: remove constant prop in the pass
   ConstantPropagation(graph);
   std::string convolution = R"(
       graph(%a, %w, %b, %stride:int[], %padding:int[], %dilation:int[],
diff --git a/torch/csrc/jit/passes/quantization/insert_observers.cpp b/torch/csrc/jit/passes/quantization/insert_observers.cpp
@@ -1107,10 +1107,9 @@ void InsertObserversHelper::preprocess(
 
   Method method = module.get_method(method_name);
   auto graph = method.graph();
-  // must do constant propagation first before replacement
-  replaceConvolutionWithAtenConv(graph);
   // fuse decomposed linear into aten::linear
   FuseLinear(graph);
+  replaceConvolutionWithAtenConv(graph);
 }
 
 void InsertObserversHelper::analyze(

Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,7 @@ std::unordered_map<std::string, c10::IValue> getConvParams(`
`61`	`61`	`}`
`62`	`62`
`63`	`63`	`void replaceConvolutionWithAtenConv(std::shared_ptr<Graph>& graph) {`
	`64`	`+ // TODO: remove constant prop in the pass`
`64`	`65`	`ConstantPropagation(graph);`
`65`	`66`	`std::string convolution = R"(`
`66`	`67`	`graph(%a, %w, %b, %stride:int[], %padding:int[], %dilation:int[],`