Skip to content

Commit 054a254

Browse files
ezyangpytorchmergebot
authored andcommitted
Run minifier tests same process when possible (#100416)
The fast minifier tests now take only 10s to run. Signed-off-by: Edward Z. Yang <ezyang@meta.com> Pull Request resolved: #100416 Approved by: https://github.com/voznesenskym
1 parent f093ee1 commit 054a254

File tree

4 files changed

+175
-73
lines changed

4 files changed

+175
-73
lines changed

test/dynamo/test_minifier.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def inner(x):
3131
)
3232

3333
test_proc, _, repro_proc = self._run_full_test_nocode(
34-
run_code, "dynamo", repro_level, ""
34+
run_code, "dynamo", repro_level, "", isolate=False
3535
)
3636

3737
self.assertIn(error_name, test_proc.stderr.decode("utf-8"))
@@ -148,7 +148,7 @@ def inner(x1, y1):
148148
)
149149

150150
(test_proc, _, repro_proc), (launch_code, _) = self._run_full_test(
151-
run_code, "dynamo", 2, ""
151+
run_code, "dynamo", 2, "", isolate=False
152152
)
153153

154154
tb1 = test_proc.stderr.decode("utf-8")
@@ -208,7 +208,7 @@ def inner(x):
208208
)
209209

210210
(test_proc, _, repro_proc), (launch_code, repro_code) = self._run_full_test(
211-
run_code, "dynamo", 2, ""
211+
run_code, "dynamo", 2, "", isolate=False
212212
)
213213

214214
tb1 = test_proc.stderr.decode("utf-8")

test/inductor/test_minifier.py

Lines changed: 8 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -14,35 +14,29 @@
1414

1515

1616
class MinifierTests(MinifierTestBase):
17-
# Generates code that patches CppOverrides/TritonOverrides.
18-
def _gen_codegen_fn_patch_code(self, device, bug_type):
19-
assert bug_type in ("compile_error", "runtime_error", "accuracy")
20-
return f"""\
21-
{torch._dynamo.config.codegen_config()}
22-
{torch._inductor.config.codegen_config()}
23-
torch._inductor.config.{"cpp" if device == "cpu" else "triton"}.inject_relu_bug_TESTING_ONLY = {bug_type!r}
24-
"""
25-
2617
# Test that compile and accuracy errors after aot can be repro'd (both CPU and CUDA)
2718
def _test_after_aot(self, device, bug_type, repro_level):
19+
# NB: The program is intentionally quite simple, just enough to
20+
# trigger one minification step, no more (dedicated minifier tests
21+
# should exercise minifier only)
2822
run_code = textwrap.dedent(
2923
f"""\
3024
@torch.compile()
3125
def inner(x):
32-
for _ in range(3):
33-
x = torch.sin(x)
3426
x = torch.relu(x)
35-
for _ in range(3):
36-
x = torch.cos(x)
27+
x = torch.cos(x)
3728
return x
3829
3930
inner(torch.randn(20, 20).to("{device}"))
4031
"""
4132
)
33+
# These will crash the process and should be tested in
34+
# test_minifier_isolate.py
35+
assert bug_type != "runtime_error"
4236
patch_code = self._gen_codegen_fn_patch_code(device, bug_type)
4337
self.assertIsNotNone(patch_code)
4438
test_proc, _, repro_proc = self._run_full_test_nocode(
45-
run_code, "aot", repro_level, patch_code
39+
run_code, "aot", repro_level, patch_code, isolate=False
4640
)
4741
return test_proc.stderr.decode("utf-8"), repro_proc.stderr.decode("utf-8")
4842

@@ -70,42 +64,6 @@ def test_after_aot_cuda_accuracy_error(self):
7064
self.assertIn("AccuracyError", tb1)
7165
self.assertIn("AccuracyError", tb2)
7266

73-
# Test that runtime errors after aot can be repro'd (CPU only for now)
74-
def _test_after_aot_runtime_error(self, device, bug_type):
75-
run_code = textwrap.dedent(
76-
f"""\
77-
@torch.compile()
78-
def inner(x):
79-
for _ in range(3):
80-
x = torch.sin(x)
81-
x = torch.relu(x)
82-
for _ in range(3):
83-
x = torch.cos(x)
84-
return x
85-
86-
inner(torch.randn(20, 20).to("{device}"))
87-
"""
88-
)
89-
patch_code = self._gen_codegen_fn_patch_code(device, bug_type)
90-
self.assertIsNotNone(patch_code)
91-
92-
test_proc, _, repro_proc = self._run_full_test_nocode(
93-
run_code, "aot", 3, patch_code
94-
)
95-
96-
self.assertNotIn("CompilerError", test_proc.stderr.decode("utf-8"))
97-
98-
self.assertEqual(test_proc.returncode, repro_proc.returncode)
99-
self.assertNotEqual(test_proc.returncode, 0)
100-
101-
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
102-
def test_after_aot_cpu_runtime_error(self):
103-
self._test_after_aot_runtime_error("cpu", "runtime_error")
104-
105-
@requires_cuda()
106-
def test_after_aot_cuda_runtime_error(self):
107-
self._test_after_aot_runtime_error("cuda", "runtime_error")
108-
10967

11068
if __name__ == "__main__":
11169
import sys
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
# Owner(s): ["module: inductor"]
2+
import functools
3+
import textwrap
4+
import unittest
5+
6+
import torch
7+
import torch._dynamo
8+
import torch._inductor.utils
9+
from torch._dynamo.test_minifier_common import MinifierTestBase
10+
from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN
11+
12+
_HAS_TRITON = torch._inductor.utils.has_triton()
13+
requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")
14+
15+
16+
# These minifier tests are slow, because they must be run in separate
17+
# subprocesses
18+
class MinifierIsolateTests(MinifierTestBase):
19+
def _test_after_aot_runtime_error(self, device, bug_type):
20+
run_code = textwrap.dedent(
21+
f"""\
22+
@torch.compile()
23+
def inner(x):
24+
x = torch.relu(x)
25+
x = torch.cos(x)
26+
return x
27+
28+
inner(torch.randn(20, 20).to("{device}"))
29+
"""
30+
)
31+
patch_code = self._gen_codegen_fn_patch_code(device, bug_type)
32+
self.assertIsNotNone(patch_code)
33+
34+
# These must isolate because they crash the process
35+
test_proc, _, repro_proc = self._run_full_test_nocode(
36+
run_code, "aot", 3, patch_code, isolate=True
37+
)
38+
39+
self.assertNotIn("CompilerError", test_proc.stderr.decode("utf-8"))
40+
41+
self.assertEqual(test_proc.returncode, repro_proc.returncode)
42+
self.assertNotEqual(test_proc.returncode, 0)
43+
44+
@unittest.skipIf(IS_JETSON, "Fails on Jetson")
45+
def test_after_aot_cpu_runtime_error(self):
46+
self._test_after_aot_runtime_error("cpu", "runtime_error")
47+
48+
@requires_cuda()
49+
def test_after_aot_cuda_runtime_error(self):
50+
self._test_after_aot_runtime_error("cuda", "runtime_error")
51+
52+
53+
if __name__ == "__main__":
54+
import sys
55+
56+
from torch._dynamo.test_case import run_tests
57+
58+
# Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
59+
# also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
60+
# also skip on Py 3.11+ since unhandled exceptions can cause segfaults
61+
if not IS_MACOS and not TEST_WITH_ASAN and sys.version_info < (3, 11):
62+
run_tests()

torch/_dynamo/test_minifier_common.py

Lines changed: 102 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
1+
import io
2+
import logging
13
import os
24
import re
35
import shutil
46
import subprocess
57
import tempfile
8+
import traceback
9+
from unittest.mock import patch
610

711
import torch
812
import torch._dynamo
@@ -41,15 +45,80 @@ def tearDownClass(cls):
4145
print(f"test_minifier_common tmpdir kept at: {cls.DEBUG_DIR}")
4246
cls._exit_stack.close()
4347

48+
def _gen_codegen_fn_patch_code(self, device, bug_type):
49+
assert bug_type in ("compile_error", "runtime_error", "accuracy")
50+
return f"""\
51+
{torch._dynamo.config.codegen_config()}
52+
{torch._inductor.config.codegen_config()}
53+
torch._inductor.config.{"cpp" if device == "cpu" else "triton"}.inject_relu_bug_TESTING_ONLY = {bug_type!r}
54+
"""
55+
56+
def _maybe_subprocess_run(self, args, *, isolate, cwd=None):
57+
if not isolate:
58+
assert len(args) >= 2, args
59+
assert args[0] == "python3", args
60+
if args[1] == "-c":
61+
assert len(args) == 3, args
62+
code = args[2]
63+
args = ["-c"]
64+
else:
65+
assert len(args) >= 2, args
66+
with open(args[1], "r") as f:
67+
code = f.read()
68+
args = args[1:]
69+
70+
# WARNING: This is not a perfect simulation of running
71+
# the program out of tree. We only interpose on things we KNOW we
72+
# need to handle for tests. If you need more stuff, you will
73+
# need to augment this appropriately.
74+
75+
# NB: Can't use save_config because that will omit some fields,
76+
# but we must save and reset ALL fields
77+
dynamo_config = torch._dynamo.config._config.copy()
78+
inductor_config = torch._inductor.config._config.copy()
79+
try:
80+
stderr = io.StringIO()
81+
log_handler = logging.StreamHandler(stderr)
82+
log = logging.getLogger("torch._dynamo")
83+
log.addHandler(log_handler)
84+
try:
85+
prev_cwd = os.getcwd()
86+
if cwd is not None:
87+
os.chdir(cwd)
88+
with patch("sys.argv", args):
89+
exec(code, {"__name__": "__main__"})
90+
rc = 0
91+
except Exception:
92+
rc = 1
93+
traceback.print_exc(file=stderr)
94+
finally:
95+
log.removeHandler(log_handler)
96+
if cwd is not None:
97+
os.chdir(prev_cwd)
98+
finally:
99+
object.__setattr__(torch._dynamo.config, "_config", dynamo_config)
100+
object.__setattr__(torch._inductor.config, "_config", inductor_config)
101+
102+
# TODO: return a more appropriate data structure here
103+
return subprocess.CompletedProcess(
104+
args,
105+
rc,
106+
b"",
107+
stderr.getvalue().encode("utf-8"),
108+
)
109+
else:
110+
return subprocess.run(args, capture_output=True, cwd=cwd)
111+
44112
# Run `code` in a separate python process.
45113
# Returns the completed process state and the directory containing the
46114
# minifier launcher script, if `code` outputted it.
47-
def _run_test_code(self, code):
48-
proc = subprocess.run(
49-
["python3", "-c", code], capture_output=True, cwd=self.DEBUG_DIR
115+
def _run_test_code(self, code, *, isolate):
116+
proc = self._maybe_subprocess_run(
117+
["python3", "-c", code], isolate=isolate, cwd=self.DEBUG_DIR
50118
)
51-
print("stdout:", proc.stdout.decode("utf-8"))
52-
print("stderr:", proc.stderr.decode("utf-8"))
119+
120+
print("test stdout:", proc.stdout.decode("utf-8"))
121+
print("test stderr:", proc.stderr.decode("utf-8"))
53122
repro_dir_match = re.search(
54123
r"(\S+)minifier_launcher.py", proc.stderr.decode("utf-8")
55124
)
@@ -58,34 +127,35 @@ def _run_test_code(self, code):
58127
return proc, None
59128

60129
# Runs the minifier launcher script in `repro_dir`
61-
def _run_minifier_launcher(self, repro_dir):
130+
def _run_minifier_launcher(self, repro_dir, isolate):
62131
self.assertIsNotNone(repro_dir)
63132
launch_file = os.path.join(repro_dir, "minifier_launcher.py")
64133
with open(launch_file, "r") as f:
65134
launch_code = f.read()
66135
self.assertTrue(os.path.exists(launch_file))
67136

68-
launch_proc = subprocess.run(
69-
["python3", launch_file],
70-
capture_output=True,
71-
cwd=repro_dir,
72-
)
137+
args = ["python3", launch_file, "minify"]
138+
if not isolate:
139+
args.append("--no-isolate")
140+
launch_proc = self._maybe_subprocess_run(args, isolate=isolate, cwd=repro_dir)
73141
print("minifier stdout:", launch_proc.stdout.decode("utf-8"))
74142
print("minifier stderr:", launch_proc.stderr.decode("utf-8"))
75143

76144
return launch_proc, launch_code
77145

78146
# Runs the repro script in `repro_dir`
79-
def _run_repro(self, repro_dir):
147+
def _run_repro(self, repro_dir, *, isolate=True):
80148
self.assertIsNotNone(repro_dir)
81149
repro_file = os.path.join(repro_dir, "repro.py")
82150
with open(repro_file, "r") as f:
83151
repro_code = f.read()
84152
self.assertTrue(os.path.exists(repro_file))
85153

86-
repro_proc = subprocess.run(
87-
["python3", repro_file], capture_output=True, cwd=repro_dir
154+
repro_proc = self._maybe_subprocess_run(
155+
["python3", repro_file], isolate=isolate, cwd=repro_dir
88156
)
157+
print("repro stdout:", repro_proc.stdout.decode("utf-8"))
158+
print("repro stderr:", repro_proc.stderr.decode("utf-8"))
89159
return repro_proc, repro_code
90160

91161
# Template for testing code.
@@ -108,16 +178,28 @@ def _gen_test_code(self, run_code, repro_after, repro_level, patch_code):
108178
# 1. Run the problematic code (in a separate process since it could segfault)
109179
# 2. Run the generated minifier launcher script
110180
# 3. Run the generated repro script
111-
def _run_full_test(self, run_code, repro_after, repro_level, patch_code):
181+
#
182+
# If possible, you should run the test with isolate=False; use
183+
# isolate=True only if the bug you're testing would otherwise
184+
# crash the process
185+
def _run_full_test(
186+
self, run_code, repro_after, repro_level, patch_code, *, isolate
187+
):
112188
test_code = self._gen_test_code(run_code, repro_after, repro_level, patch_code)
113-
test_proc, repro_dir = self._run_test_code(test_code)
189+
test_proc, repro_dir = self._run_test_code(test_code, isolate=isolate)
114190
self.assertIsNotNone(repro_dir)
115191
print("running minifier")
116-
launch_proc, launch_code = self._run_minifier_launcher(repro_dir)
192+
launch_proc, launch_code = self._run_minifier_launcher(
193+
repro_dir, isolate=isolate
194+
)
117195
print("running repro")
118-
repro_proc, repro_code = self._run_repro(repro_dir)
196+
repro_proc, repro_code = self._run_repro(repro_dir, isolate=isolate)
119197
return (test_proc, launch_proc, repro_proc), (launch_code, repro_code)
120198

121-
def _run_full_test_nocode(self, run_code, repro_after, repro_level, patch_code):
122-
tbs, _ = self._run_full_test(run_code, repro_after, repro_level, patch_code)
199+
def _run_full_test_nocode(
200+
self, run_code, repro_after, repro_level, patch_code, *, isolate
201+
):
202+
tbs, _ = self._run_full_test(
203+
run_code, repro_after, repro_level, patch_code, isolate=isolate
204+
)
123205
return tbs

0 commit comments

Comments
 (0)