[nnc] Use a descriptive name for fused kernels when profiling (#66990)

bertmaher · facebook-github-bot · commit bdb889aca194 · 2021-10-21T00:06:23.000-07:00
Summary: Pull Request resolved: #66990 NNC fusion groups currently show up as "TensorExpr" in the profiler, which is true but not super useful since it obscures what's actually happening in the fusion group. This change will log them as `fused_XXX` where XXX is a (length-limited) series of ops describing the subgraph, for instance `fused_mul_add` to represent a group containing `aten::mul`, `aten::add`. Test Plan: New unit test to check the output of autograd profiler. Reviewed By: dzhulgakov Differential Revision: D31762087 fbshipit-source-id: 3fadbdc67b054faa01aa42e5b6ea2c4a6bc3481f
diff --git a/test/test_jit_fuser_te.py b/test/test_jit_fuser_te.py
@@ -1962,6 +1962,18 @@ def bn_neither(i, x):
             for fn in [bn, bn_no_weight, bn_no_bias, bn_neither]:
                 test(fn, (i, x))
 
+    def test_profiler(self):
+        @torch.jit.script
+        def test(x, y, z):
+            return x * y + z
+
+        args = [torch.randn(4) for _ in range(3)]
+        with torch.autograd.profiler.profile() as prof:
+            for _ in range(3):
+                test(*args)
+        self.assertIn("fused_mul_add", prof.table())
+
+
 works_list = [
     '__radd__',
     '__rdiv__',
diff --git a/torch/csrc/jit/passes/tensorexpr_fuser.cpp b/torch/csrc/jit/passes/tensorexpr_fuser.cpp
@@ -1302,7 +1302,7 @@ Operation createTensorExprOp(const Node* node) {
   auto kernel =
       std::make_shared<tensorexpr::TensorExprKernel>(node->g(attr::Subgraph));
   return [kernel](Stack& stack) {
-    RECORD_FUNCTION("TensorExpr", std::vector<c10::IValue>());
+    RECORD_FUNCTION(kernel->getKernelName(), std::vector<c10::IValue>());
     kernel->run(stack);
     return 0;
   };
diff --git a/torch/csrc/jit/tensorexpr/kernel.h b/torch/csrc/jit/tensorexpr/kernel.h
@@ -122,6 +122,10 @@ class TORCH_API TensorExprKernel {
     return bufferArgs_;
   }
 
+  const std::string& getKernelName() const {
+    return codegen_->kernel_func_name();
+  }
+
  private:
   enum BackendType {
     kUninitialized,