pytorch · albanD · Mar 25, 2020 · Jun 26, 2020 · Jun 29, 2020 · Jul 2, 2020
diff --git a/benchmarks/functional_autograd_benchmark/README.md b/benchmarks/functional_autograd_benchmark/README.md
@@ -0,0 +1,48 @@
+# Benchmarking tool for the autograd API
+
+This folder contain a set of self-contained scripts that allow to benchmark the autograd with different common models.
+It is designed to run the benchmark before and after your change and will generate a table to share on the PR.
+
+To do so, you can use `functional_autograd_benchmark.py` to run the benchmarks before your change (using as output `before.txt`) and after your change (using as output `after.txt`).
+You can then use `compare.py` to get a markdown table comparing the two runs.
+
+The default arguments of `functional_autograd_benchmark.py` should be used in general. You can change them though to force a given device or force running even the (very) slow settings.
+
+### Sample usage
+
+```bash
+# Make sure you compile pytorch in release mode and with the same flags before/after
+export DEBUG=0
+# When running on CPU, it might be required to limit the number of cores to avoid oversubscription
+export OMP_NUM_THREADS=10
+
+# Compile pytorch with the base revision
+git checkout master
+python setup.py develop
+
+# Run the benchmark for the base
+# This will use the GPU if available.
+pushd benchmarks/functional_autograd_benchmark
+python functional_autograd_benchmark.py --output before.txt
+
+# Compile pytorch with your change
+popd
+git checkout your_feature_branch
+python setup.py develop
+
+# Run the benchmark for the new version
+pushd benchmarks/functional_autograd_benchmark
+python functional_autograd_benchmark.py --output after.txt
+
+# Get the markdown table that you can paste in your github PR
+python compare.py
+
+popd
+
+```
+
+### Files in this folder:
+- `functional_autograd_benchmark.py` is the main entry point to run the benchmark.
+- `compare.py` is the entry point to run the comparison script that generates a markdown table.
+- `torchaudio_models.py` and `torchvision_models.py`  contains code extracted from torchaudio and torchvision to be able to run the models without having a specific version of these libraries installed.
+- `ppl_models.py`, `vision_models.py` and `audio_text_models.py` contain all the getter functions used for the benchmark.
diff --git a/benchmarks/functional_autograd_benchmark/audio_text_models.py b/benchmarks/functional_autograd_benchmark/audio_text_models.py
@@ -0,0 +1,122 @@
+import torch
+from torch import nn, Tensor
+
+import torchaudio_models as models
+
+from utils import extract_weights, load_weights, GetterReturnType
+
+def get_wav2letter(device: torch.device) -> GetterReturnType:
+    N = 10
+    input_frames = 700
+    vocab_size = 28
+    model = models.Wav2Letter(num_classes=vocab_size)
+    criterion = torch.nn.NLLLoss()
+    model.to(device)
+    params, names = extract_weights(model)
+
+    inputs = torch.rand([N, 1, input_frames], device=device)
+    labels = torch.rand(N, 3, device=device).mul(vocab_size).long()
+
+    def forward(*new_params: Tensor) -> Tensor:
+        load_weights(model, names, new_params)
+        out = model(inputs)
+
+        loss = criterion(out, labels)
+        return loss
+
+    return forward, params
+
+def get_deepspeech(device: torch.device) -> GetterReturnType:
+    sample_rate = 16000
+    window_size = 0.02
+    window = "hamming"
+    audio_conf = dict(sample_rate=sample_rate,
+                      window_size=window_size,
+                      window=window,
+                      noise_dir=None)
+
+    N = 10
+    num_classes = 10
+    spectrogram_size = 161
+    # Commented are the original sizes in the code
+    seq_length = 500  # 1343
+    target_length = 10  # 50
+    labels = torch.rand(num_classes, device=device)
+    inputs = torch.rand(N, 1, spectrogram_size, seq_length, device=device)
+    # Sequence length for each input
+    inputs_sizes = torch.rand(N, device=device).mul(seq_length * 0.1).add(seq_length * 0.8)
+    targets = torch.rand(N, target_length, device=device)
+    targets_sizes = torch.full((N,), target_length, dtype=torch.int, device=device)
+
+    model = models.DeepSpeech(rnn_type=nn.LSTM, labels=labels, rnn_hidden_size=1024, nb_layers=5,
+                              audio_conf=audio_conf, bidirectional=True)
+    model = model.to(device)
+    criterion = nn.CTCLoss()
+    params, names = extract_weights(model)
+
+    def forward(*new_params: Tensor) -> Tensor:
+        load_weights(model, names, new_params)
+        out, out_sizes = model(inputs, inputs_sizes)
+        out = out.transpose(0, 1)  # For ctc loss
+
+        loss = criterion(out, targets, out_sizes, targets_sizes)
+        return loss
+
+    return forward, params
+
+def get_transformer(device: torch.device) -> GetterReturnType:
+    # For most SOTA research, you would like to have embed to 720, nhead to 12, bsz to 64, tgt_len/src_len to 128.
+    N = 64
+    seq_length = 128
+    ntoken = 50
+    model = models.TransformerModel(ntoken=ntoken, ninp=720, nhead=12, nhid=2048, nlayers=2)
+    model.to(device)
+    criterion = nn.NLLLoss()
+    params, names = extract_weights(model)
+
+    data = torch.rand(N, seq_length + 1, device=device).mul(ntoken).long()
+    inputs = data.narrow(1, 0, seq_length)
+    targets = data.narrow(1, 1, seq_length)
+
+    def forward(*new_params: Tensor) -> Tensor:
+        load_weights(model, names, new_params)
+        out = model(inputs)
+
+        loss = criterion(out.reshape(N * seq_length, ntoken), targets.reshape(N * seq_length))
+        return loss
+
+    return forward, params
+
+def get_multiheadattn(device: torch.device) -> GetterReturnType:
+    # From https://github.com/pytorch/text/blob/master/test/data/test_modules.py#L10
+    embed_dim, nhead, tgt_len, src_len, bsz = 10, 5, 6, 10, 64
+    # Build torchtext MultiheadAttention module
+    in_proj = models.InProjContainer(torch.nn.Linear(embed_dim, embed_dim, bias=False),
+                                     torch.nn.Linear(embed_dim, embed_dim, bias=False),
+                                     torch.nn.Linear(embed_dim, embed_dim, bias=False))
+
+    model = models.MultiheadAttentionContainer(nhead, in_proj,
+                                               models.ScaledDotProduct(),
+                                               torch.nn.Linear(embed_dim, embed_dim, bias=False))
+    model.to(device)
+    params, names = extract_weights(model)
+
+    query = torch.rand((tgt_len, bsz, embed_dim), device=device)
+    key = value = torch.rand((src_len, bsz, embed_dim), device=device)
+    attn_mask_2D = torch.randint(0, 2, (tgt_len, src_len), device=device).to(torch.bool)
+    bias_k = bias_v = torch.rand((1, 1, embed_dim), device=device)
+
+    attn_mask = torch.stack([attn_mask_2D] * (bsz * nhead))
+    bias_k = bias_k.repeat(1, bsz, 1).reshape(1, bsz * nhead, -1)
+    bias_v = bias_v.repeat(1, bsz, 1).reshape(1, bsz * nhead, -1)
+
+    def forward(*new_params: Tensor) -> Tensor:
+        load_weights(model, names, new_params)
+        mha_output, attn_weights = model(query, key, value, attn_mask=attn_mask, bias_k=bias_k, bias_v=bias_v)
+
+        # Don't test any specific loss, just backprop ones for both outputs
+        loss = mha_output.sum() + attn_weights.sum()
+
+        return loss
+
+    return forward, params
diff --git a/benchmarks/functional_autograd_benchmark/compare.py b/benchmarks/functional_autograd_benchmark/compare.py
@@ -0,0 +1,45 @@
+import argparse
+from collections import defaultdict
+
+from utils import to_markdown_table, from_markdown_table
+
+def main():
+    parser = argparse.ArgumentParser("Main script to compare results from the benchmarks")
+    parser.add_argument("--before", type=str, default="before.txt", help="Text file containing the times to use as base")
+    parser.add_argument("--after", type=str, default="after.txt", help="Text file containing the times to use as new version")
+    parser.add_argument("--output", type=str, default="", help="Text file where to write the output")
+    args = parser.parse_args()
+
+    with open(args.before, "r") as f:
+        content = f.read()
+    res_before = from_markdown_table(content)
+
+    with open(args.after, "r") as f:
+        content = f.read()
+    res_after = from_markdown_table(content)
+
+    diff = defaultdict(defaultdict)
+    for model in res_before:
+        for task in res_before[model]:
+            mean_before, var_before = res_before[model][task]
+            if task not in res_after[model]:
+                diff[model][task] = (None, mean_before, var_before, None, None)
+            else:
+                mean_after, var_after = res_after[model][task]
+                diff[model][task] = (mean_before / mean_after, mean_before, var_before, mean_after, var_after)
+    for model in res_after:
+        for task in res_after[model]:
+            if task not in res_before[model]:
+                mean_after, var_after = res_after[model][task]
+                diff[model][task] = (None, None, None, mean_after, var_after)
+
+    header = ("model", "task", "speedup", "mean (before)", "var (before)", "mean (after)", "var (after)")
+    out = to_markdown_table(diff, header=header)
+
+    print(out)
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(out)
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py b/benchmarks/functional_autograd_benchmark/functional_autograd_benchmark.py
@@ -0,0 +1,153 @@
+import torch
+from torch.autograd import functional
+
+import time
+from argparse import ArgumentParser
+from collections import defaultdict
+from typing import NamedTuple, Callable, List, Any
+
+import ppl_models
+import vision_models
+import audio_text_models
+
+from utils import to_markdown_table, TimingResultType, InputsType, GetterType, VType
+
+# Listing of the different tasks
+FAST_TASKS_NO_DOUBLE_BACK = [
+    "vjp",
+]
+
+FAST_TASKS = FAST_TASKS_NO_DOUBLE_BACK + [
+    "vhp",
+    "jvp",
+]
+
+ALL_TASKS = FAST_TASKS + [
+    "hvp",
+    "jacobian",
+    "hessian"
+]
+
+DOUBLE_BACKWARD_TASKS = ["jvp", "hvp", "vhp", "hessian"]
+
+# Model definition which contains:
+# - name: a string with the model name.
+# - getter: a function to get the model. It takes as input the device on which the model
+#     will run. It should return the forward function and the parameters (Tensors) used as
+#     input for the forward function. Note that the forward must *not* have any side effect.
+# - tasks: the list of recommended tasks that can run in a reasonable amount of time with this model.
+# - unsupported: the list of tasks that this model cannot run.
+class ModelDef(NamedTuple):
+    name: str
+    getter: GetterType
+    tasks: List[str]
+    unsupported: List[str]
+
+MODELS = [
+    ModelDef("resnet18", vision_models.get_resnet18, FAST_TASKS, []),
+    ModelDef("fcn_resnet", vision_models.get_fcn_resnet, FAST_TASKS, []),
+    ModelDef("detr", vision_models.get_detr, FAST_TASKS, []),
+    ModelDef("ppl_simple_reg", ppl_models.get_simple_regression, ALL_TASKS, []),
+    ModelDef("ppl_robust_reg", ppl_models.get_robust_regression, ALL_TASKS, []),
+    ModelDef("wav2letter", audio_text_models.get_wav2letter, FAST_TASKS, []),
+    ModelDef("deepspeech", audio_text_models.get_deepspeech, FAST_TASKS_NO_DOUBLE_BACK, DOUBLE_BACKWARD_TASKS),
+    ModelDef("transformer", audio_text_models.get_transformer, FAST_TASKS, []),
+    ModelDef("multiheadattn", audio_text_models.get_multiheadattn, FAST_TASKS, []),
+]
+
+def get_v_for(model: Callable, inp: InputsType, task: str) -> VType:
+    v: VType
+
+    if task in ["vjp"]:
+        out = model(*inp)
+        v = torch.rand_like(out)
+    elif task in ["jvp", "hvp", "vhp"]:
+        if isinstance(inp, tuple):
+            v = tuple(torch.rand_like(i) for i in inp)
+        else:
+            v = torch.rand_like(inp)
+    else:
+        v = None
+
+    return v
+
+def run_once(model: Callable, inp: InputsType, task: str, v: VType) -> None:
+    func = getattr(functional, task)
+
+    if v is not None:
+        res = func(model, inp, v=v, strict=True)
+    else:
+        res = func(model, inp, strict=True)
+
+def run_model(model_getter: GetterType, args: Any, task: str) -> List[float]:
+    if args.gpu == -1:
+        device = torch.device("cpu")
+
+        def noop():
+            pass
+        do_sync = noop
+    else:
+        device = torch.device("cuda:{}".format(args.gpu))
+        do_sync = torch.cuda.synchronize
+
+    model, inp = model_getter(device)
+
+    v = get_v_for(model, inp, task)
+    # Warmup
+    run_once(model, inp, task, v)
+
+    elapsed = []
+    for it in range(args.num_iters):
+        do_sync()
+        start = time.time()
+        run_once(model, inp, task, v)
+        do_sync()
+        elapsed.append(time.time() - start)
+
+    return elapsed
+
+def main():
+    parser = ArgumentParser("Main script to benchmark functional API of the autograd.")
+    parser.add_argument("--output", type=str, default="", help="Text file where to write the output")
+    parser.add_argument("--num-iters", type=int, default=10)
+    parser.add_argument("--gpu", type=int, default=-2, help="GPU to use, -1 for CPU and -2 for auto-detect")
+    parser.add_argument("--run-slow-tasks", action="store_true", help="Run even the slow tasks")
+    parser.add_argument("--model-filter", type=str, default="", help="Only run the models in this filter")
+    parser.add_argument("--task-filter", type=str, default="", help="Only run the tasks in this filter")
+    parser.add_argument("--num-threads", type=int, default=10,
+                        help="Number of concurrent threads to use when running on cpu")
+    parser.add_argument("--seed", type=int, default=0, help="The random seed to use.")
+    args = parser.parse_args()
+
+    results: TimingResultType = defaultdict(defaultdict)
+    torch.set_num_threads(args.num_threads)
+    torch.set_num_interop_threads(args.num_threads)
+
+    # This automatically seed cuda if it is available
+    torch.manual_seed(args.seed)
+
+    if args.gpu == -2:
+        args.gpu = 0 if torch.cuda.is_available() else -1
+
+    for name, model_getter, recommended_tasks, unsupported_tasks in MODELS:
+        if args.model_filter and name not in args.model_filter:
+            continue
+        tasks = ALL_TASKS if args.run_slow_tasks else recommended_tasks
+        for task in tasks:
+            if task in unsupported_tasks:
+                continue
+            if args.task_filter and task not in args.task_filter:
+                continue
+            runtimes = run_model(model_getter, args, task)
+
+            runtimes = torch.tensor(runtimes)
+            mean, var = runtimes.mean(), runtimes.var()
+            results[name][task] = (mean.item(), var.item())
+            print("Results for model {} on task {}: {}s (var: {})".format(name, task, mean, var))
+
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(to_markdown_table(results))
+
+if __name__ == "__main__":
+    main()