Skip to content

Commit e08e93f

Browse files
albanDfacebook-github-bot
authored andcommitted
Reland of benchmark code (#43428)
Summary: Reland of the benchmark code that broke the slow tests because the GPU were running out of memory Pull Request resolved: #43428 Reviewed By: ngimel Differential Revision: D23296136 Pulled By: albanD fbshipit-source-id: 0002ae23dc82f401604e33d0905d6b9eedebc851
1 parent 4cfac34 commit e08e93f

File tree

11 files changed

+2078
-0
lines changed

11 files changed

+2078
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Benchmarking tool for the autograd API
2+
3+
This folder contain a set of self-contained scripts that allow to benchmark the autograd with different common models.
4+
It is designed to run the benchmark before and after your change and will generate a table to share on the PR.
5+
6+
To do so, you can use `functional_autograd_benchmark.py` to run the benchmarks before your change (using as output `before.txt`) and after your change (using as output `after.txt`).
7+
You can then use `compare.py` to get a markdown table comparing the two runs.
8+
9+
The default arguments of `functional_autograd_benchmark.py` should be used in general. You can change them though to force a given device or force running even the (very) slow settings.
10+
11+
### Sample usage
12+
13+
```bash
14+
# Make sure you compile pytorch in release mode and with the same flags before/after
15+
export DEBUG=0
16+
# When running on CPU, it might be required to limit the number of cores to avoid oversubscription
17+
export OMP_NUM_THREADS=10
18+
19+
# Compile pytorch with the base revision
20+
git checkout master
21+
python setup.py develop
22+
23+
# Run the benchmark for the base
24+
# This will use the GPU if available.
25+
pushd benchmarks/functional_autograd_benchmark
26+
python functional_autograd_benchmark.py --output before.txt
27+
28+
# Compile pytorch with your change
29+
popd
30+
git checkout your_feature_branch
31+
python setup.py develop
32+
33+
# Run the benchmark for the new version
34+
pushd benchmarks/functional_autograd_benchmark
35+
python functional_autograd_benchmark.py --output after.txt
36+
37+
# Get the markdown table that you can paste in your github PR
38+
python compare.py
39+
40+
popd
41+
42+
```
43+
44+
### Files in this folder:
45+
- `functional_autograd_benchmark.py` is the main entry point to run the benchmark.
46+
- `compare.py` is the entry point to run the comparison script that generates a markdown table.
47+
- `torchaudio_models.py` and `torchvision_models.py` contains code extracted from torchaudio and torchvision to be able to run the models without having a specific version of these libraries installed.
48+
- `ppl_models.py`, `vision_models.py` and `audio_text_models.py` contain all the getter functions used for the benchmark.
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import torch
2+
from torch import nn, Tensor
3+
4+
import torchaudio_models as models
5+
6+
from utils import extract_weights, load_weights, GetterReturnType
7+
8+
def get_wav2letter(device: torch.device) -> GetterReturnType:
9+
N = 10
10+
input_frames = 700
11+
vocab_size = 28
12+
model = models.Wav2Letter(num_classes=vocab_size)
13+
criterion = torch.nn.NLLLoss()
14+
model.to(device)
15+
params, names = extract_weights(model)
16+
17+
inputs = torch.rand([N, 1, input_frames], device=device)
18+
labels = torch.rand(N, 3, device=device).mul(vocab_size).long()
19+
20+
def forward(*new_params: Tensor) -> Tensor:
21+
load_weights(model, names, new_params)
22+
out = model(inputs)
23+
24+
loss = criterion(out, labels)
25+
return loss
26+
27+
return forward, params
28+
29+
def get_deepspeech(device: torch.device) -> GetterReturnType:
30+
sample_rate = 16000
31+
window_size = 0.02
32+
window = "hamming"
33+
audio_conf = dict(sample_rate=sample_rate,
34+
window_size=window_size,
35+
window=window,
36+
noise_dir=None)
37+
38+
N = 10
39+
num_classes = 10
40+
spectrogram_size = 161
41+
# Commented are the original sizes in the code
42+
seq_length = 500 # 1343
43+
target_length = 10 # 50
44+
labels = torch.rand(num_classes, device=device)
45+
inputs = torch.rand(N, 1, spectrogram_size, seq_length, device=device)
46+
# Sequence length for each input
47+
inputs_sizes = torch.rand(N, device=device).mul(seq_length * 0.1).add(seq_length * 0.8)
48+
targets = torch.rand(N, target_length, device=device)
49+
targets_sizes = torch.full((N,), target_length, dtype=torch.int, device=device)
50+
51+
model = models.DeepSpeech(rnn_type=nn.LSTM, labels=labels, rnn_hidden_size=1024, nb_layers=5,
52+
audio_conf=audio_conf, bidirectional=True)
53+
model = model.to(device)
54+
criterion = nn.CTCLoss()
55+
params, names = extract_weights(model)
56+
57+
def forward(*new_params: Tensor) -> Tensor:
58+
load_weights(model, names, new_params)
59+
out, out_sizes = model(inputs, inputs_sizes)
60+
out = out.transpose(0, 1) # For ctc loss
61+
62+
loss = criterion(out, targets, out_sizes, targets_sizes)
63+
return loss
64+
65+
return forward, params
66+
67+
def get_transformer(device: torch.device) -> GetterReturnType:
68+
# For most SOTA research, you would like to have embed to 720, nhead to 12, bsz to 64, tgt_len/src_len to 128.
69+
N = 64
70+
seq_length = 128
71+
ntoken = 50
72+
model = models.TransformerModel(ntoken=ntoken, ninp=720, nhead=12, nhid=2048, nlayers=2)
73+
model.to(device)
74+
criterion = nn.NLLLoss()
75+
params, names = extract_weights(model)
76+
77+
data = torch.rand(N, seq_length + 1, device=device).mul(ntoken).long()
78+
inputs = data.narrow(1, 0, seq_length)
79+
targets = data.narrow(1, 1, seq_length)
80+
81+
def forward(*new_params: Tensor) -> Tensor:
82+
load_weights(model, names, new_params)
83+
out = model(inputs)
84+
85+
loss = criterion(out.reshape(N * seq_length, ntoken), targets.reshape(N * seq_length))
86+
return loss
87+
88+
return forward, params
89+
90+
def get_multiheadattn(device: torch.device) -> GetterReturnType:
91+
# From https://github.com/pytorch/text/blob/master/test/data/test_modules.py#L10
92+
embed_dim, nhead, tgt_len, src_len, bsz = 10, 5, 6, 10, 64
93+
# Build torchtext MultiheadAttention module
94+
in_proj = models.InProjContainer(torch.nn.Linear(embed_dim, embed_dim, bias=False),
95+
torch.nn.Linear(embed_dim, embed_dim, bias=False),
96+
torch.nn.Linear(embed_dim, embed_dim, bias=False))
97+
98+
model = models.MultiheadAttentionContainer(nhead, in_proj,
99+
models.ScaledDotProduct(),
100+
torch.nn.Linear(embed_dim, embed_dim, bias=False))
101+
model.to(device)
102+
params, names = extract_weights(model)
103+
104+
query = torch.rand((tgt_len, bsz, embed_dim), device=device)
105+
key = value = torch.rand((src_len, bsz, embed_dim), device=device)
106+
attn_mask_2D = torch.randint(0, 2, (tgt_len, src_len), device=device).to(torch.bool)
107+
bias_k = bias_v = torch.rand((1, 1, embed_dim), device=device)
108+
109+
attn_mask = torch.stack([attn_mask_2D] * (bsz * nhead))
110+
bias_k = bias_k.repeat(1, bsz, 1).reshape(1, bsz * nhead, -1)
111+
bias_v = bias_v.repeat(1, bsz, 1).reshape(1, bsz * nhead, -1)
112+
113+
def forward(*new_params: Tensor) -> Tensor:
114+
load_weights(model, names, new_params)
115+
mha_output, attn_weights = model(query, key, value, attn_mask=attn_mask, bias_k=bias_k, bias_v=bias_v)
116+
117+
# Don't test any specific loss, just backprop ones for both outputs
118+
loss = mha_output.sum() + attn_weights.sum()
119+
120+
return loss
121+
122+
return forward, params
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import argparse
2+
from collections import defaultdict
3+
4+
from utils import to_markdown_table, from_markdown_table
5+
6+
def main():
7+
parser = argparse.ArgumentParser("Main script to compare results from the benchmarks")
8+
parser.add_argument("--before", type=str, default="before.txt", help="Text file containing the times to use as base")
9+
parser.add_argument("--after", type=str, default="after.txt", help="Text file containing the times to use as new version")
10+
parser.add_argument("--output", type=str, default="", help="Text file where to write the output")
11+
args = parser.parse_args()
12+
13+
with open(args.before, "r") as f:
14+
content = f.read()
15+
res_before = from_markdown_table(content)
16+
17+
with open(args.after, "r") as f:
18+
content = f.read()
19+
res_after = from_markdown_table(content)
20+
21+
diff = defaultdict(defaultdict)
22+
for model in res_before:
23+
for task in res_before[model]:
24+
mean_before, var_before = res_before[model][task]
25+
if task not in res_after[model]:
26+
diff[model][task] = (None, mean_before, var_before, None, None)
27+
else:
28+
mean_after, var_after = res_after[model][task]
29+
diff[model][task] = (mean_before / mean_after, mean_before, var_before, mean_after, var_after)
30+
for model in res_after:
31+
for task in res_after[model]:
32+
if task not in res_before[model]:
33+
mean_after, var_after = res_after[model][task]
34+
diff[model][task] = (None, None, None, mean_after, var_after)
35+
36+
header = ("model", "task", "speedup", "mean (before)", "var (before)", "mean (after)", "var (after)")
37+
out = to_markdown_table(diff, header=header)
38+
39+
print(out)
40+
if args.output:
41+
with open(args.output, "w") as f:
42+
f.write(out)
43+
44+
if __name__ == "__main__":
45+
main()
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import torch
2+
from torch.autograd import functional
3+
4+
import time
5+
from argparse import ArgumentParser
6+
from collections import defaultdict
7+
from typing import NamedTuple, Callable, List, Any
8+
9+
import ppl_models
10+
import vision_models
11+
import audio_text_models
12+
13+
from utils import to_markdown_table, TimingResultType, InputsType, GetterType, VType
14+
15+
# Listing of the different tasks
16+
FAST_TASKS_NO_DOUBLE_BACK = [
17+
"vjp",
18+
]
19+
20+
FAST_TASKS = FAST_TASKS_NO_DOUBLE_BACK + [
21+
"vhp",
22+
"jvp",
23+
]
24+
25+
ALL_TASKS = FAST_TASKS + [
26+
"hvp",
27+
"jacobian",
28+
"hessian"
29+
]
30+
31+
DOUBLE_BACKWARD_TASKS = ["jvp", "hvp", "vhp", "hessian"]
32+
33+
# Model definition which contains:
34+
# - name: a string with the model name.
35+
# - getter: a function to get the model. It takes as input the device on which the model
36+
# will run. It should return the forward function and the parameters (Tensors) used as
37+
# input for the forward function. Note that the forward must *not* have any side effect.
38+
# - tasks: the list of recommended tasks that can run in a reasonable amount of time with this model.
39+
# - unsupported: the list of tasks that this model cannot run.
40+
class ModelDef(NamedTuple):
41+
name: str
42+
getter: GetterType
43+
tasks: List[str]
44+
unsupported: List[str]
45+
46+
MODELS = [
47+
ModelDef("resnet18", vision_models.get_resnet18, FAST_TASKS, []),
48+
ModelDef("fcn_resnet", vision_models.get_fcn_resnet, FAST_TASKS, []),
49+
ModelDef("detr", vision_models.get_detr, FAST_TASKS, []),
50+
ModelDef("ppl_simple_reg", ppl_models.get_simple_regression, ALL_TASKS, []),
51+
ModelDef("ppl_robust_reg", ppl_models.get_robust_regression, ALL_TASKS, []),
52+
ModelDef("wav2letter", audio_text_models.get_wav2letter, FAST_TASKS, []),
53+
ModelDef("deepspeech", audio_text_models.get_deepspeech, FAST_TASKS_NO_DOUBLE_BACK, DOUBLE_BACKWARD_TASKS),
54+
ModelDef("transformer", audio_text_models.get_transformer, FAST_TASKS, []),
55+
ModelDef("multiheadattn", audio_text_models.get_multiheadattn, FAST_TASKS, []),
56+
]
57+
58+
def get_v_for(model: Callable, inp: InputsType, task: str) -> VType:
59+
v: VType
60+
61+
if task in ["vjp"]:
62+
out = model(*inp)
63+
v = torch.rand_like(out)
64+
elif task in ["jvp", "hvp", "vhp"]:
65+
if isinstance(inp, tuple):
66+
v = tuple(torch.rand_like(i) for i in inp)
67+
else:
68+
v = torch.rand_like(inp)
69+
else:
70+
v = None
71+
72+
return v
73+
74+
def run_once(model: Callable, inp: InputsType, task: str, v: VType) -> None:
75+
func = getattr(functional, task)
76+
77+
if v is not None:
78+
res = func(model, inp, v=v, strict=True)
79+
else:
80+
res = func(model, inp, strict=True)
81+
82+
def run_model(model_getter: GetterType, args: Any, task: str) -> List[float]:
83+
if args.gpu == -1:
84+
device = torch.device("cpu")
85+
86+
def noop():
87+
pass
88+
do_sync = noop
89+
else:
90+
device = torch.device("cuda:{}".format(args.gpu))
91+
do_sync = torch.cuda.synchronize
92+
93+
model, inp = model_getter(device)
94+
95+
v = get_v_for(model, inp, task)
96+
# Warmup
97+
run_once(model, inp, task, v)
98+
99+
elapsed = []
100+
for it in range(args.num_iters):
101+
do_sync()
102+
start = time.time()
103+
run_once(model, inp, task, v)
104+
do_sync()
105+
elapsed.append(time.time() - start)
106+
107+
return elapsed
108+
109+
def main():
110+
parser = ArgumentParser("Main script to benchmark functional API of the autograd.")
111+
parser.add_argument("--output", type=str, default="", help="Text file where to write the output")
112+
parser.add_argument("--num-iters", type=int, default=10)
113+
parser.add_argument("--gpu", type=int, default=-2, help="GPU to use, -1 for CPU and -2 for auto-detect")
114+
parser.add_argument("--run-slow-tasks", action="store_true", help="Run even the slow tasks")
115+
parser.add_argument("--model-filter", type=str, default="", help="Only run the models in this filter")
116+
parser.add_argument("--task-filter", type=str, default="", help="Only run the tasks in this filter")
117+
parser.add_argument("--num-threads", type=int, default=10,
118+
help="Number of concurrent threads to use when running on cpu")
119+
parser.add_argument("--seed", type=int, default=0, help="The random seed to use.")
120+
args = parser.parse_args()
121+
122+
results: TimingResultType = defaultdict(defaultdict)
123+
torch.set_num_threads(args.num_threads)
124+
torch.set_num_interop_threads(args.num_threads)
125+
126+
# This automatically seed cuda if it is available
127+
torch.manual_seed(args.seed)
128+
129+
if args.gpu == -2:
130+
args.gpu = 0 if torch.cuda.is_available() else -1
131+
132+
for name, model_getter, recommended_tasks, unsupported_tasks in MODELS:
133+
if args.model_filter and name not in args.model_filter:
134+
continue
135+
tasks = ALL_TASKS if args.run_slow_tasks else recommended_tasks
136+
for task in tasks:
137+
if task in unsupported_tasks:
138+
continue
139+
if args.task_filter and task not in args.task_filter:
140+
continue
141+
runtimes = run_model(model_getter, args, task)
142+
143+
runtimes = torch.tensor(runtimes)
144+
mean, var = runtimes.mean(), runtimes.var()
145+
results[name][task] = (mean.item(), var.item())
146+
print("Results for model {} on task {}: {}s (var: {})".format(name, task, mean, var))
147+
148+
if args.output:
149+
with open(args.output, "w") as f:
150+
f.write(to_markdown_table(results))
151+
152+
if __name__ == "__main__":
153+
main()

0 commit comments

Comments
 (0)