pytorch · ezyang · Mar 23, 2018 · Feb 12, 2018 · Feb 22, 2018 · Feb 23, 2018
diff --git a/docs/source/bottleneck.rst b/docs/source/bottleneck.rst
@@ -0,0 +1,31 @@
+torch.utils.bottleneck
+===============
+
+.. currentmodule:: torch.utils.bottleneck
+
+`torch.utils.bottleneck` is a tool that can be used as an initial step for
+debugging bottlenecks in your program. It summarizes runs of your script with 
+the Python profiler and PyTorch's autograd profiler. 
+
+Run it on the command line with 
+
+::
+
+    python -m torch.utils.bottleneck -- /path/to/source/script.py [args]
+
+where [args] are any number of arguments to `script.py`, or run
+``python -m torch.utils.bottleneck -h`` for more usage instructions.
+
+.. warning::
+    Because your script will be profiled, please ensure that it exits in a 
+    finite amount of time.
+
+.. warning::
+    Due to the asynchronous nature of CUDA kernels, when running against
+    CUDA code, the cProfile output and CPU-mode autograd profilers may
+    not show correct timings. In this case, the CUDA-mode autograd
+    profiler is better at assigning blame to the relevant operator(s).
+
+For more complicated uses of the profilers (like in a multi-GPU case),
+please see https://docs.python.org/3/library/profile.html
+or :func:`torch.autograd.profiler.profile()` for more information. 
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -39,6 +39,7 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs.
    data
    model_zoo
    onnx
+   bottleneck
 
 .. toctree::
    :glob:

diff --git a/test/bottleneck/test.py b/test/bottleneck/test.py
@@ -0,0 +1,4 @@
+import torch
+
+x = torch.ones((3, 3), requires_grad=True)
+(3 * x).sum().backward()
diff --git a/test/bottleneck/test_args.py b/test/bottleneck/test_args.py
@@ -0,0 +1,13 @@
+import argparse
+import torch
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    # Required args. Raises error if they aren't passed.
+    parser.add_argument('--foo', help='foo', required=True)
+    parser.add_argument('--bar', help='bar', required=True)
+    _ = parser.parse_args()
+
+    x = torch.ones((3, 3), requires_grad=True)
+    (3 * x).sum().backward()
diff --git a/test/bottleneck/test_cuda.py b/test/bottleneck/test_cuda.py
@@ -0,0 +1,27 @@
+import torch
+import torch.nn as nn
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.linear = nn.Linear(20, 20)
+
+    def forward(self, input):
+        out = self.linear(input[:, 10:30])
+        return out.sum()
+
+
+def main():
+    data = torch.randn(10, 50).cuda()
+    model = Model().cuda()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
+    for i in range(10):
+        optimizer.zero_grad()
+        loss = model(data)
+        loss.backward()
+        optimizer.step()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -1,6 +1,7 @@
 from __future__ import print_function
 import sys
 import os
+import re
 import math
 import shutil
 import random
@@ -385,6 +386,105 @@ def _transform_MultiMarginCriterion(self, input, target):
         return input, target.sub(1)
 
 
+class TestBottleneck(TestCase):
+    def _run(self, command):
+        """Returns (return-code, stdout, stderr)"""
+        import subprocess
+        from common import PY3
+
+        p = subprocess.Popen(command, stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE, shell=True)
+        output, err = p.communicate()
+        rc = p.returncode
+        if PY3:
+            output = output.decode("ascii")
+            err = err.decode("ascii")
+        return (rc, output, err)
+
+    def _run_bottleneck(self, test_file, scriptargs=''):
+        import os
+        curdir = os.path.dirname(os.path.abspath(__file__))
+        filepath = '{}/{}'.format(curdir, test_file)
+        if scriptargs != '':
+            mark = '-- '
+            scriptargs = ' {}'.format(scriptargs)
+        else:
+            mark = ''
+        rc, out, err = self._run(
+            'python -m torch.utils.bottleneck {}{}{}'.format(mark, filepath, scriptargs))
+        return rc, out, err
+
+    def _check_run_args(self):
+        # Check that this fails due to missing args
+        rc, out, err = self._run_bottleneck('bottleneck/test_args.py')
+        self.assertEqual(rc, 2, None, self._fail_msg('Missing args should error', out + err))
+
+        # This should succeed
+        rc, out, err = self._run_bottleneck('bottleneck/test_args.py', '--foo foo --bar bar')
+        self.assertEqual(rc, 0, None, self._fail_msg('Should pass args to script', out + err))
+
+    def _fail_msg(self, msg, output):
+        return '{}, output was:\n{}'.format(msg, output)
+
+    def _check_environment_summary(self, output):
+        results = re.search('Environment Summary', output)
+        self.assertIsNotNone(results, self._fail_msg('Should have Enviroment Summary', output))
+
+        # Up to five lines away from the heading, there should be the version number
+        results = re.search(r'Environment Summary.*(\n.*){,5}\nPyTorch \d+\.\d+', output)
+        self.assertIsNotNone(results, self._fail_msg('Should have PyTorch version', output))
+
+    def _check_cprof_summary(self, output):
+        results = re.search('cProfile output', output)
+        self.assertIsNotNone(results, self._fail_msg('Should have cProfile output', output))
+
+        # This assumes that after the cProfile output section we have
+        # the autograd profiler output
+        results = re.search(r'cProfile output.*(\n.*){6,50}\n.*autograd profiler output', output)
+        self.assertIsNotNone(results, self._fail_msg(
+            'Distance between cProfile and autograd prof out not in [6, 50] lines', output))
+
+    def _check_autograd_summary(self, output):
+        results = re.search('autograd profiler output', output)
+        self.assertIsNotNone(results, self._fail_msg('Should have autograd profiler output', output))
+
+        # This assumes that after the autograd profiler output is the end of the
+        # output.
+        results = re.search(r'autograd profiler output.*(\n.*){6,100}', output)
+        self.assertIsNotNone(results, self._fail_msg(
+            'Distance between autograd prof output and end of output not in [6, 100] lines', output))
+
+    def _check_cuda(self, output):
+        if torch.cuda.is_available():
+            results = re.search('CUDA mode', output)
+            self.assertIsNotNone(results, self._fail_msg('Should tell users CUDA', output))
+        else:
+            results = re.search('CUDA mode', output)
+            self.assertIsNone(results, self._fail_msg('Should not tell users about CUDA', output))
+
+    @unittest.skipIf(torch.cuda.is_available(), 'CPU-only test')
+    def test_cpu_only(self):
+        rc, out, err = self._run_bottleneck('bottleneck/test.py')
+        self.assertEqual(rc, 0, 'Run failed with\n{}'.format(err))
+
+        self._check_run_args()
+        self._check_environment_summary(out)
+        self._check_autograd_summary(out)
+        self._check_cprof_summary(out)
+        self._check_cuda(out)
+
+    @unittest.skipIf(not torch.cuda.is_available(), 'No CUDA')
+    def test_cuda(self):
+        rc, out, err = self._run_bottleneck('bottleneck/test_cuda.py')
+        self.assertEqual(rc, 0, 'Run failed with\n{}'.format(err))
+
+        self._check_run_args()
+        self._check_environment_summary(out)
+        self._check_autograd_summary(out)
+        self._check_cprof_summary(out)
+        self._check_cuda(out)
+
+
 class TestONNXUtils(TestCase):
     def test_prepare_onnx_paddings(self):
         sizes = [2, 3, 4]

diff --git a/torch/utils/bottleneck/__init__.py b/torch/utils/bottleneck/__init__.py
-Original file line number
+Diff line change
@@ Expand Up @@
        data
        model_zoo
        onnx
+       bottleneck
     .. toctree::
        :glob:
@@ Expand Down @@