@@ -255,6 +255,29 @@ test_inductor() {
255255 python test/run_test.py --include inductor/test_torchinductor inductor/test_torchinductor_opinfo --verbose
256256}
257257
258+ # "Global" flags for inductor benchmarking controlled by TEST_CONFIG
259+ # For example 'dynamic_aot_eager_torchbench' TEST_CONFIG means we run
260+ # the benchmark script with '--dynamic-shapes --backend aot_eager --device cuda'
261+ # The matrix of test options is specified in .github/workflows/periodic.yml
262+ # and .github/workflows/inductor.yml
263+ DYNAMO_BENCHMARK_FLAGS=()
264+
265+ if [[ " ${TEST_CONFIG} " == * aot_eager* ]]; then
266+ DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
267+ elif [[ " ${TEST_CONFIG} " == * inductor* ]]; then
268+ DYNAMO_BENCHMARK_FLAGS+=(--inductor)
269+ fi
270+
271+ if [[ " ${TEST_CONFIG} " == * dynamic* ]]; then
272+ DYNAMO_BENCHMARK_FLAGS+=(--dynamic-shapes)
273+ fi
274+
275+ if [[ " ${TEST_CONFIG} " == * cpu_accuracy* ]]; then
276+ DYNAMO_BENCHMARK_FLAGS+=(--device cpu)
277+ else
278+ DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
279+ fi
280+
258281test_single_dynamo_benchmark () {
259282 # Usage: test_single_dynamo_benchmark inductor_inference huggingface 0 --args-for-script
260283
@@ -277,143 +300,66 @@ test_single_dynamo_benchmark() {
277300 partition_flags=( --total-partitions 2 --partition-id " $shard_id " )
278301 fi
279302
280- # Feel free to remove --device cuda if you ever decide to need to
281- # test CPU as well in CI
282- python " benchmarks/dynamo/$suite .py" \
283- --ci --accuracy --timing --explain \
284- " $@ " " ${partition_flags[@]} " \
285- --output " $TEST_REPORTS_DIR /${name} _${suite} .csv"
286- python benchmarks/dynamo/check_csv.py \
287- -f " $TEST_REPORTS_DIR /${name} _${suite} .csv"
288- }
289-
290- test_aot_eager_benchmark () {
291- # Usage: test_dynamo_benchmark huggingface 0
292-
293- local exit_status=0
294-
295- # Check inference with --float32
296- test_single_dynamo_benchmark " aot_eager_inference" " $@ " --backend aot_eager --device cuda || exit_status=$?
297-
298- # Check training with --amp
299- test_single_dynamo_benchmark " aot_eager_training" " $@ " --backend aot_eager --device cuda --training --amp || exit_status=$?
300-
301- if [[ $exit_status -ne 0 ]]; then
302- echo " Some benchmarks failed; scroll up for details"
303+ if [[ " ${TEST_CONFIG} " == * perf* ]]; then
304+ # MKL_THREADING_LAYER=GNU to mitigate https://github.com/pytorch/pytorch/issues/37377
305+ MKL_THREADING_LAYER=GNU python benchmarks/dynamo/runner.py --suites=" $suite " \
306+ --base-sha=" $BASE_SHA " --output-dir=" $TEST_REPORTS_DIR " " ${partition_flags[@]} " \
307+ --no-graphs --no-update-archive --no-gh-comment
308+ else
309+ python " benchmarks/dynamo/$suite .py" \
310+ --ci --accuracy --timing --explain \
311+ " ${DYNAMO_BENCHMARK_FLAGS[@]} " \
312+ " $@ " " ${partition_flags[@]} " \
313+ --output " $TEST_REPORTS_DIR /${name} _${suite} .csv"
314+ python benchmarks/dynamo/check_csv.py \
315+ -f " $TEST_REPORTS_DIR /${name} _${suite} .csv"
303316 fi
304- return $exit_status
305317}
306318
307- test_inductor_benchmark () {
319+ test_dynamo_benchmark () {
308320 # Usage: test_dynamo_benchmark huggingface 0
309321
310- local device=" $1 "
322+ local suite=" $1 "
323+ shift
324+ local shard_id=" $1 "
311325 shift
312326
313- if [[ $device == " cpu" ]]; then
314- # TODO: Add training and dynamic shape test
315- test_single_dynamo_benchmark " inductor_inference" " $@ " --inductor --float32 --device cpu
327+ if [[ " ${TEST_CONFIG} " == * perf* ]]; then
328+ # Performance test training only, for float32 and amp
329+ test_single_dynamo_benchmark " amp" " $suite " " $shard_id " --training --dtypes=amp " $@ "
330+ test_single_dynamo_benchmark " float32" " $suite " " $shard_id " --training --dtypes=float32 " $@ "
316331 else
317332 # Check inference with --float32
318- test_single_dynamo_benchmark " inductor_inference" " $@ " --inductor --device cuda
319-
320- # Check training with --amp
321- test_single_dynamo_benchmark " inductor_training" " $@ " --inductor --training --amp --device cuda
333+ test_single_dynamo_benchmark " inference" " $suite " " $shard_id " --float32 " $@ "
322334
323- # Check inference with --dynamic-shapes
324- test_single_dynamo_benchmark " dynamic_inductor-inference" " $@ " --inductor --dynamic-shapes --device cuda
335+ if [[ " ${TEST_CONFIG} " != * cpu_accuracy* && " ${TEST_CONFIG} " != * dynamic* ]]; then
336+ # Check training with --amp
337+ test_single_dynamo_benchmark " training" " $suite " " $shard_id " --training --amp " $@ "
338+ fi
325339 fi
326340}
327341
328- test_inductor_benchmark_perf () {
329- # Use test-reports directory under test folder will allow the CI to automatically pick up
330- # the test reports and upload them to S3. Need to use full path here otherwise the script
331- # will bark about file not found later on
342+ test_inductor_torchbench_smoketest_perf () {
332343 TEST_REPORTS_DIR=$( pwd) /test/test-reports
333- PARTITION_FLAGS=" "
334- if [[ -n " $NUM_TEST_SHARDS " && -n " $2 " ]]; then
335- PARTITION_FLAGS=" --total-partitions 2 --partition-id $2 "
336- fi
337344 mkdir -p " $TEST_REPORTS_DIR "
338- # Check training with --amp
339- # Not checking accuracy for perf test for now
340- # shellcheck disable=SC2086
341- if [[ " $1 " == * smoketest* ]]; then
342- python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
343- --batch-size-file " $( realpath benchmarks/dynamo/torchbench_models_list.txt) " --only hf_Bert \
344- --output " $TEST_REPORTS_DIR " /inductor_training_$1 .csv
345- # the reference speedup value is hardcoded in check_hf_bert_perf_csv.py
346- # this value needs to be actively maintained to make this check useful
347- python benchmarks/dynamo/check_hf_bert_perf_csv.py -f " $TEST_REPORTS_DIR " /inductor_training_$1 .csv
348-
349- # Check memory compression ratio for a few models
350- for test in hf_Albert timm_efficientdet timm_vision_transformer; do
351- python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
352- --disable-cudagraphs --batch-size-file " $( realpath benchmarks/dynamo/torchbench_models_list.txt) " \
353- --only $test --output " $TEST_REPORTS_DIR " /inductor_training_$1 _$test .csv
354- cat " $TEST_REPORTS_DIR " /inductor_training_$1 _$test .csv
355- python benchmarks/dynamo/check_memory_compression_ratio.py --actual \
356- " $TEST_REPORTS_DIR " /inductor_training_$1 _$test .csv \
357- --expected benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv
358- done
359- else
360- python benchmarks/dynamo/$1 .py --ci --training --performance --disable-cudagraphs\
361- --device cuda --inductor --amp $PARTITION_FLAGS --output " $TEST_REPORTS_DIR " /inductor_training_$1 .csv
362- fi
363- }
364-
365- # No sharding for the periodic job, we don't care if latency is bad
366- test_aot_eager_all () {
367- local exit_status=0
368- PYTHONPATH=$( pwd) /torchbench test_aot_eager_benchmark torchbench " " " $@ " || exit_status=$?
369- test_aot_eager_benchmark huggingface " " " $@ " || exit_status=$?
370- test_aot_eager_benchmark timm_models " " " $@ " || exit_status=$?
371- if [[ $exit_status -ne 0 ]]; then
372- echo " Some benchmarks failed; scroll up for details"
373- fi
374- return $exit_status
375- }
376345
377- test_inductor_huggingface () {
378- local device=$1
379- shift
380- test_inductor_benchmark " $device " huggingface " "
381- }
382-
383- test_inductor_huggingface_perf () {
384- test_inductor_benchmark_perf huggingface
385- }
386-
387- test_inductor_timm_shard () {
388- if [[ -z " $NUM_TEST_SHARDS " ]]; then
389- echo " NUM_TEST_SHARDS must be defined to run a Python test shard"
390- exit 1
391- fi
392- local device=$1
393- shift
394- test_inductor_benchmark " $device " timm_models " $1 "
395- }
396-
397- test_inductor_timm_perf_shard () {
398- if [[ -z " $NUM_TEST_SHARDS " ]]; then
399- echo " NUM_TEST_SHARDS must be defined to run a Python test shard"
400- exit 1
401- fi
402- test_inductor_benchmark_perf timm_models " $1 "
403- }
404-
405- test_inductor_torchbench () {
406- local device=$1
407- shift
408- PYTHONPATH=$( pwd) /torchbench test_inductor_benchmark " $device " torchbench " "
409- }
410-
411- test_inductor_torchbench_perf () {
412- PYTHONPATH=$( pwd) /torchbench test_inductor_benchmark_perf torchbench
413- }
414-
415- test_inductor_torchbench_smoketest_perf (){
416- PYTHONPATH=$( pwd) /torchbench test_inductor_benchmark_perf smoketest
346+ python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
347+ --batch-size-file " $( realpath benchmarks/dynamo/torchbench_models_list.txt) " --only hf_Bert \
348+ --output " $TEST_REPORTS_DIR /inductor_training_smoketest.csv"
349+ # the reference speedup value is hardcoded in check_hf_bert_perf_csv.py
350+ # this value needs to be actively maintained to make this check useful
351+ python benchmarks/dynamo/check_hf_bert_perf_csv.py -f " $TEST_REPORTS_DIR /inductor_training_smoketest.csv"
352+
353+ # Check memory compression ratio for a few models
354+ for test in hf_Albert timm_efficientdet timm_vision_transformer; do
355+ python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \
356+ --disable-cudagraphs --batch-size-file " $( realpath benchmarks/dynamo/torchbench_models_list.txt) " \
357+ --only $test --output " $TEST_REPORTS_DIR /inductor_training_smoketest_$test .csv"
358+ cat " $TEST_REPORTS_DIR /inductor_training_smoketest_$test .csv"
359+ python benchmarks/dynamo/check_memory_compression_ratio.py --actual \
360+ " $TEST_REPORTS_DIR /inductor_training_smoketest_$test .csv" \
361+ --expected benchmarks/dynamo/expected_ci_perf_inductor_torchbench.csv
362+ done
417363}
418364
419365test_python_gloo_with_tls () {
@@ -842,6 +788,12 @@ test_executorch() {
842788 assert_git_not_dirty
843789}
844790
791+ # TODO: Include this in the Docker image
792+ if [[ " ${TEST_CONFIG} " == * _perf* ]]; then
793+ install_matplotlib
794+ install_tabulate
795+ fi
796+
845797if ! [[ " ${BUILD_ENVIRONMENT} " == * libtorch* || " ${BUILD_ENVIRONMENT} " == * -bazel-* || " ${BUILD_ENVIRONMENT} " == * -tsan* ]]; then
846798 (cd test && python -c " import torch; print(torch.__config__.show())" )
847799 (cd test && python -c " import torch; print(torch.__config__.parallel_info())" )
@@ -878,81 +830,24 @@ elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHAR
878830elif [[ " ${TEST_CONFIG} " == * dynamo* && " ${SHARD_NUMBER} " == 2 && $NUM_TEST_SHARDS -gt 1 ]]; then
879831 install_torchvision
880832 test_dynamo_shard 2
881- elif [[ " ${TEST_CONFIG} " == * aot_eager_all* ]]; then
882- install_torchtext
883- install_torchvision
884- checkout_install_torchbench
885- install_huggingface
886- install_timm
887- if [[ " ${TEST_CONFIG} " == * dynamic* ]]; then
888- # NB: This code path is currently dead because dynamic shapes takes
889- # too long to run unsharded
890- test_aot_eager_all --dynamic-shapes
891- else
892- test_aot_eager_all
893- fi
894- elif [[ " ${TEST_CONFIG} " == * aot_eager_huggingface* ]]; then
895- install_torchvision
896- install_huggingface
897- if [[ " ${TEST_CONFIG} " == * dynamic* ]]; then
898- test_aot_eager_benchmark huggingface " " --dynamic-shapes
899- else
900- test_aot_eager_benchmark huggingface " "
901- fi
902- elif [[ " ${TEST_CONFIG} " == * aot_eager_timm* && $NUM_TEST_SHARDS -gt 1 ]]; then
903- install_torchvision
904- install_timm
905- id=$(( SHARD_NUMBER- 1 ))
906- if [[ " ${TEST_CONFIG} " == * dynamic* ]]; then
907- test_aot_eager_benchmark timm_models " $id " --dynamic-shapes
908- else
909- test_aot_eager_benchmark timm_models " $id "
910- fi
911- elif [[ " ${TEST_CONFIG} " == * aot_eager_torchbench* ]]; then
912- install_torchtext
913- install_torchvision
914- checkout_install_torchbench
915- if [[ " ${TEST_CONFIG} " == * dynamic* ]]; then
916- PYTHONPATH=$( pwd) /torchbench test_aot_eager_benchmark torchbench " " --dynamic-shapes
917- else
918- PYTHONPATH=$( pwd) /torchbench test_aot_eager_benchmark torchbench " "
919- fi
920- elif [[ " ${TEST_CONFIG} " == * inductor_huggingface* ]]; then
833+ elif [[ " ${TEST_CONFIG} " == * huggingface* ]]; then
921834 install_torchvision
922835 install_huggingface
923- if [[ " ${TEST_CONFIG} " == * inductor_huggingface_perf* ]]; then
924- test_inductor_huggingface_perf
925- elif [[ " ${TEST_CONFIG} " == * inductor_huggingface_cpu_accuracy* ]]; then
926- test_inductor_huggingface cpu
927- else
928- test_inductor_huggingface cuda
929- fi
930- elif [[ " ${TEST_CONFIG} " == * inductor_timm* && $NUM_TEST_SHARDS -gt 1 ]]; then
836+ test_dynamo_benchmark huggingface " "
837+ elif [[ " ${TEST_CONFIG} " == * timm* ]]; then
931838 install_torchvision
932839 install_timm
933840 id=$(( SHARD_NUMBER- 1 ))
934- if [[ " ${TEST_CONFIG} " == * inductor_timm_perf* && $NUM_TEST_SHARDS -gt 1 ]]; then
935- test_inductor_timm_perf_shard $id
936- elif [[ " ${TEST_CONFIG} " == * inductor_timm_cpu_accuracy* && $NUM_TEST_SHARDS -gt 1 ]]; then
937- test_inductor_timm_shard cpu $id
938- else
939- test_inductor_timm_shard cuda $id
940- fi
941- elif [[ " ${TEST_CONFIG} " == * inductor_torchbench* ]]; then
841+ test_dynamo_benchmark timm_models " $id "
842+ elif [[ " ${TEST_CONFIG} " == * torchbench* ]]; then
942843 install_torchtext
943844 install_torchvision
944- if [[ " ${TEST_CONFIG} " == * inductor_torchbench_perf* ]]; then
945- checkout_install_torchbench
946- test_inductor_torchbench_perf
947- elif [[ " ${TEST_CONFIG} " == * inductor_torchbench_cpu_accuracy* ]]; then
948- checkout_install_torchbench
949- test_inductor_torchbench cpu
950- elif [[ " ${TEST_CONFIG} " == * inductor_torchbench_smoketest_perf* ]]; then
845+ if [[ " ${TEST_CONFIG} " == * inductor_torchbench_smoketest_perf* ]]; then
951846 checkout_install_torchbench hf_Bert hf_Albert timm_efficientdet timm_vision_transformer
952- test_inductor_torchbench_smoketest_perf
847+ PYTHONPATH= $( pwd ) /torchbench test_inductor_torchbench_smoketest_perf
953848 else
954849 checkout_install_torchbench
955- test_inductor_torchbench cuda
850+ PYTHONPATH= $( pwd ) /torchbench test_dynamo_benchmark torchbench " "
956851 fi
957852elif [[ " ${TEST_CONFIG} " == * inductor* && " ${SHARD_NUMBER} " == 1 ]]; then
958853 install_torchvision
0 commit comments