pytorch · kimishpatel · Jun 1, 2020 · Jun 1, 2020 · Jun 15, 2020 · Jun 15, 2020
diff --git a/test/test_jit.py b/test/test_jit.py
@@ -546,6 +546,126 @@ def f(x, y):
         self.run_pass('dce', trace.graph)
         FileCheck().check_not("type_as").run(str(trace.graph))
 
+    def test_add_relu_fusion(self):
+        class M(torch.nn.Module):
+            def __init__(self, relu_op):
+                super(M, self).__init__()
+                self.relu_op = relu_op
+
+            def forward(self, a, b, c):
+                tmp = torch.add(a, b)
+                x = self.relu_op(tmp)
+                d = torch.add(a, c)
+                return x + d
+        a = torch.rand((7, 11))
+        a = a * -10
+        a = a + 5
+        b = torch.rand((7, 11))
+        c = torch.rand((7, 11))
+        m = torch.jit.script(M(torch.relu))
+        orig_res = m(a, b, c)
+        torch._C._jit_pass_fuse_add_relu(m.graph)
+        buffer = io.BytesIO()
+        torch.jit.save(m, buffer)
+        buffer.seek(0)
+        m = torch.jit.load(buffer)
+        new_res = m(a, b, c)
+        FileCheck().check_not("aten::relu(") \
+            .check("aten::add_relu(") \
+            .run(m.graph)
+        torch.testing.assert_allclose(orig_res, new_res)
+
+        # add, relu_
+        a = torch.rand((7, 11))
+        a = a * -10
+        a = a + 5
+        b = torch.rand((7, 11))
+        c = torch.rand((7, 11))
+        m = torch.jit.script(M(torch.relu_))
+        orig_res = m(a, b, c)
+        torch._C._jit_pass_fuse_add_relu(m.graph)
+        buffer = io.BytesIO()
+        torch.jit.save(m, buffer)
+        buffer.seek(0)
+        m = torch.jit.load(buffer)
+        new_res = m(a, b, c)
+        FileCheck().check_not("aten::relu_(") \
+            .check("aten::add_relu(") \
+            .run(m.graph)
+        torch.testing.assert_allclose(orig_res, new_res)
+
+        class Madd_(torch.nn.Module):
+            def __init__(self, relu_op):
+                super(Madd_, self).__init__()
+                self.relu_op = relu_op
+
+            def forward(self, a, b):
+                x = a.add_(b)
+                x = self.relu_op(x)
+                return x
+
+        # add_, relu_
+        a = torch.rand((7, 11))
+        a = a * -10
+        a = a + 5
+        b = torch.rand((7, 11))
+        # Because in place add_ will overwrite a
+        a_copy = a.clone()
+        m = torch.jit.script(Madd_(torch.relu_))
+        orig_res = m(a, b)
+        torch._C._jit_pass_fuse_add_relu(m.graph)
+        buffer = io.BytesIO()
+        torch.jit.save(m, buffer)
+        buffer.seek(0)
+        m = torch.jit.load(buffer)
+        new_res = m(a_copy, b)
+        FileCheck().check_not("aten::add_(") \
+            .check_not("aten::relu_(") \
+            .check("aten::add_relu_(") \
+            .run(m.graph)
+        torch.testing.assert_allclose(orig_res, new_res)
+        # Since add_relu_ does inplace mutation ensure
+        # a_copy is modified
+        torch.testing.assert_allclose(orig_res, a_copy)
+
+        class Madd_out(torch.nn.Module):
+            def __init__(self, relu_op):
+                super(Madd_out, self).__init__()
+                self.relu_op = relu_op
+
+            def forward(self, a, b):
+                x = torch.add(a, b, out=a)
+                x = self.relu_op(x)
+                return x
+        a = torch.rand((7, 11))
+        a = a * -10
+        a = a + 5
+        b = torch.rand((7, 11))
+
+        # add_out, relu_
+        a = torch.rand((7, 11))
+        a = a * -10
+        a = a + 5
+        b = torch.rand((7, 11))
+        # Because in place add_ will overwrite a
+        a_copy = a.clone()
+        m = torch.jit.script(Madd_out(torch.relu_))
+        orig_res = m(a, b)
+        torch._C._jit_pass_fuse_add_relu(m.graph)
+        buffer = io.BytesIO()
+        torch.jit.save(m, buffer)
+        buffer.seek(0)
+        m = torch.jit.load(buffer)
+        new_res = m(a_copy, b)
+        FileCheck().check_not("aten::add(") \
+            .check_not("aten::relu_(") \
+            .check("aten::add_relu(") \
+            .run(m.graph)
+        torch.testing.assert_allclose(orig_res, new_res)
+        # Since add_relu_ with out=a does inplace mutation ensure
+        # a_copy is modified
+        torch.testing.assert_allclose(orig_res, a_copy)
+
     @unittest.skipIf(GRAPH_EXECUTOR == ProfilingMode.SIMPLE, "Simple executor doesn't have shape information")
     def test_peephole_optimize_shape_ops(self):
         def test_input(func, input, result):

diff --git a/tools/build_variables.bzl b/tools/build_variables.bzl
@@ -172,6 +172,7 @@ core_sources_full = [
     "torch/csrc/jit/passes/fixup_trace_scope_blocks.cpp",
     "torch/csrc/jit/passes/freeze_module.cpp",
     "torch/csrc/jit/passes/fuse_linear.cpp",
+    "torch/csrc/jit/passes/fuse_relu.cpp",
     "torch/csrc/jit/passes/graph_fuser.cpp",
     "torch/csrc/jit/passes/graph_rewrite_helper.cpp",
     "torch/csrc/jit/passes/guard_elimination.cpp",

diff --git a/torch/csrc/jit/passes/fuse_relu.cpp b/torch/csrc/jit/passes/fuse_relu.cpp
@@ -0,0 +1,70 @@
+#include <torch/csrc/jit/passes/fuse_relu.h>
+#include <torch/csrc/jit/ir/ir.h>
+#include <torch/csrc/jit/ir/subgraph_matcher.h>
+#include <torch/csrc/jit/passes/subgraph_rewrite.h>
+
+namespace torch {
+namespace jit {
+
+namespace {
+void fuseAddReluImpl(std::shared_ptr<Graph>& graph) {
+  SubgraphRewriter rewriter;
+
+  std::string add_relu_0 = R"(
+    graph(%a, %b, %alpha):
+        %add_res = aten::add(%a, %b, %alpha)
+        %res = aten::relu(%add_res)
+        return (%res))";
+  std::string add_relu_fused = R"(
+    graph(%a, %b, %alpha):
+        %res = aten::add_relu(%a, %b, %alpha)
+        return (%res))";
+  rewriter.RegisterRewritePattern(add_relu_0, add_relu_fused);
+
+  std::string add_relu_1 = R"(
+    graph(%a, %b, %alpha):
+        %add_res = aten::add(%a, %b, %alpha)
+        %res = aten::relu_(%add_res)
+        return (%res))";
+  rewriter.RegisterRewritePattern(add_relu_1, add_relu_fused);
+
+  std::string add_inplace_relu_1 = R"(
+    graph(%a, %b, %alpha):
+        %add_res = aten::add_(%a, %b, %alpha)
+        %res = aten::relu_(%add_res)
+        return (%res))";
+  std::string add_inplace_relu_fused = R"(
+    graph(%a, %b, %alpha):
+        %res = aten::add_relu_(%a, %b, %alpha)
+        return (%res))";
+  rewriter.RegisterRewritePattern(add_inplace_relu_1, add_inplace_relu_fused);
+
+  std::string add_out_relu = R"(
+    graph(%a, %b, %alpha, %out):
+        %add_res = aten::add(%a, %b, %alpha, %out)
+        %res = aten::relu_(%add_res)
+        return (%res))";
+  std::string add_out_relu_fused = R"(
+    graph(%a, %b, %alpha, %out):
+        %res = aten::add_relu(%a, %b, %alpha, %out)
+        return (%res))";
+
+  rewriter.RegisterRewritePattern(add_out_relu, add_out_relu_fused);
+
+  rewriter.runOnGraph(graph);
+  // NB: Patterns that are left out are add_ + relu and add_out + relu
+  // This is because inplace mutation of the testor done by add_ will be lost if
+  // inplace mutatation of the same tensor actually does add+relu
+}
+} // namespace
+
+void FuseAddRelu(script::Module& module) {
+  auto graph = module.get_method("forward").graph();
+  fuseAddReluImpl(graph);
+}
+
+void FuseAddRelu(std::shared_ptr<Graph>& graph) {
+  fuseAddReluImpl(graph);
+}
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/passes/fuse_relu.h b/torch/csrc/jit/passes/fuse_relu.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include <torch/csrc/jit/api/module.h>
+#include <torch/csrc/jit/ir/ir.h>
+
+namespace torch {
+namespace jit {
+TORCH_API void FuseAddRelu(script::Module& module);
+TORCH_API void FuseAddRelu(std::shared_ptr<Graph>& graph);
+} // namespace jit
+} // namespace torch
diff --git a/torch/csrc/jit/python/init.cpp b/torch/csrc/jit/python/init.cpp
@@ -21,6 +21,7 @@
 #include <torch/csrc/jit/passes/fold_conv_bn.h>
 #include <torch/csrc/jit/passes/freeze_module.h>
 #include <torch/csrc/jit/passes/fuse_linear.h>
+#include <torch/csrc/jit/passes/fuse_relu.h>
 #include <torch/csrc/jit/passes/graph_fuser.h>
 #include <torch/csrc/jit/passes/inline_fork_wait.h>
 #include <torch/csrc/jit/passes/inliner.h>
@@ -239,6 +240,9 @@ void initJITBindings(PyObject* module) {
           py::arg("module"),
           py::arg("preservedAttrs") = std::vector<std::string>())
       .def("_jit_pass_fuse_linear", &FuseLinear)
+      .def(
+          "_jit_pass_fuse_add_relu",
+          [](std::shared_ptr<Graph>& graph) { FuseAddRelu(graph); })
       .def("_jit_pass_dedup_module_uses", &DedupModuleUses)
       .def("_jit_pass_replicate_dequantize", &ReplicateDeQuant)
       .def(