avoid kernel launches for zero-sized tensor inputs

wanchaol · facebook-github-bot · commit 02bc06a6835e · 2019-07-12T12:24:52.000-07:00
Summary: Pull Request resolved: #22790 Test Plan: Imported from OSS Differential Revision: D16226168 Pulled By: wanchaol fbshipit-source-id: 081607c9acc1540c753b080c5f727dc4e8c22acc
diff --git a/test/test_jit_fuser.py b/test/test_jit_fuser.py
@@ -46,6 +46,18 @@ def test_abs_cpu(self):
     def test_abs_cuda(self):
         self._test_fused_abs(device="cuda")
 
+    @unittest.skipIf(not RUN_CUDA, "requires CUDA")
+    @skipIfRocm
+    def test_zero_element_tensors(self):
+        def decode(sin_t, cos_t):
+            theta = torch.atan2(sin_t.float(), cos_t.float())
+            return theta
+
+        sin = torch.zeros(0, device="cuda")
+        cos = torch.zeros(0, device="cuda")
+        inputs = [sin, cos]
+        ge = self.checkScript(decode, inputs)
+
     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA")
     def test_arg_configurations_smoke_cuda(self):
         # A smoke test to make sure we won't use the same kernel for contiguous
diff --git a/torch/csrc/jit/fuser/executor.cpp b/torch/csrc/jit/fuser/executor.cpp
@@ -313,8 +313,11 @@ void launchFusion(
       }
     }
   }
-
-  fusion.launch_raw(numel, arguments);
+  // Skip launching the kernel for zero-element tensor inputs
+  // launches are skipped, empty zero-sized output is returned
+  if (numel > 0) {
+    fusion.launch_raw(numel, arguments);
+  }
 }
 
 bool runFusion(const int64_t key, Stack& stack, std::string* code_out) {

Original file line number	Diff line number	Diff line change
`@@ -313,8 +313,11 @@ void launchFusion(`
`313`	`313`	`}`
`314`	`314`	`}`
`315`	`315`	`}`
`316`		`-`
`317`		`- fusion.launch_raw(numel, arguments);`
	`316`	`+ // Skip launching the kernel for zero-element tensor inputs`
	`317`	`+ // launches are skipped, empty zero-sized output is returned`
	`318`	`+ if (numel > 0) {`
	`319`	`+ fusion.launch_raw(numel, arguments);`
	`320`	`+ }`
`318`	`321`	`}`
`319`	`322`
`320`	`323`	`bool runFusion(const int64_t key, Stack& stack, std::string* code_out) {`