pytorch
diff --git a/‎.bazelversion‎
Lines changed: 1 addition & 1 deletion b/‎.bazelversion‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/triton.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/triton.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_conda.sh‎
Lines changed: 1 addition & 2 deletions b/‎.ci/docker/common/install_conda.sh‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_rocm_magma.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/common/install_rocm_magma.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/onnx/test.sh‎
Lines changed: 0 additions & 1 deletion b/‎.ci/onnx/test.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.ci/pytorch/common_utils.sh‎
Lines changed: 0 additions & 12 deletions b/‎.ci/pytorch/common_utils.sh‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎.ci/pytorch/macos-test.sh‎
Lines changed: 1 addition & 3 deletions b/‎.ci/pytorch/macos-test.sh‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.ci/pytorch/multigpu-test.sh‎
Lines changed: 0 additions & 5 deletions b/‎.ci/pytorch/multigpu-test.sh‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎.ci/pytorch/test.sh‎
Lines changed: 59 additions & 22 deletions b/‎.ci/pytorch/test.sh‎
Lines changed: 59 additions & 22 deletions
@@ -1 +1 @@
-4.2.1
+6.1.1
@@ -1 +1 @@
-2c32f4399986045ff25cae201ed3b16d922a9d3b
+e650d3708be4dca12cc3491a2f8ab18ded47c368
@@ -53,8 +53,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
   # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
   CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
   if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ]; then
-    # TODO: Stop using `-c malfet`
-    conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS} -c malfet
+    conda_install numpy=1.23.5 ${CONDA_COMMON_DEPS}
   elif [ "$ANACONDA_PYTHON_VERSION" = "3.10" ]; then
     conda_install numpy=1.21.2 ${CONDA_COMMON_DEPS}
   elif [ "$ANACONDA_PYTHON_VERSION" = "3.9" ]; then
 
@@ -22,7 +22,7 @@ pip_install \
   transformers==4.25.1
 
 # TODO: change this when onnx-script is on testPypi
-pip_install "onnx-script@git+https://github.com/microsoft/onnx-script@29241e15f5182be1384f1cf6ba203d7e2e125196"
+pip_install "onnx-script@git+https://github.com/microsoft/onnx-script@1e8d764a9be04323d7171e4d5f511332790cb809"
 
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 
@@ -6,7 +6,7 @@ set -ex
 git clone https://bitbucket.org/icl/magma.git
 pushd magma
 # Fixes memory leaks of magma found while executing linalg UTs
-git checkout 5959b8783e45f1809812ed96ae762f38ee701972
+git checkout 28592a7170e4b3707ed92644bf4a689ed600c27f
 cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
 echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
 echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib' >> make.inc
@@ -18,7 +18,7 @@ else
   amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
 fi
 for arch in $amdgpu_targets; do
-  echo "DEVCCFLAGS += --amdgpu-target=$arch" >> make.inc
+  echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
 done
 # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
 sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
 
@@ -7,7 +7,6 @@ if [[ "$BUILD_ENVIRONMENT" == *onnx* ]]; then
   pip -q install --user "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
   # TODO: This can be removed later once vision is also part of the Docker image
   pip install -q --user --no-use-pep517 "git+https://github.com/pytorch/vision.git@$(cat .github/ci_commit_pins/vision.txt)"
-
   # JIT C++ extensions require ninja, so put it into PATH.
   export PATH="/var/lib/jenkins/.local/bin:$PATH"
   "$ROOT_DIR/scripts/onnx/test.sh"
 
@@ -162,14 +162,6 @@ function clone_pytorch_xla() {
   fi
 }
 
-function install_matplotlib() {
-  pip_install matplotlib
-}
-
-function install_tabulate() {
-  pip_install tabulate
-}
-
 function checkout_install_torchdeploy() {
   local commit
   commit=$(get_pinned_commit multipy)
@@ -225,10 +217,6 @@ function checkout_install_torchbench() {
   popd
 }
 
-function test_functorch() {
-  python test/run_test.py --functorch --verbose
-}
-
 function print_sccache_stats() {
   echo 'PyTorch Build Statistics'
   sccache --show-stats
 
@@ -166,9 +166,7 @@ test_jit_hooks() {
   assert_git_not_dirty
 }
 
-if [[ "${TEST_CONFIG}" == *functorch* ]]; then
-  test_functorch
-elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
+if [[ $NUM_TEST_SHARDS -gt 1 ]]; then
   test_python_shard "${SHARD_NUMBER}"
   if [[ "${SHARD_NUMBER}" == 1 ]]; then
     test_libtorch
 
@@ -29,15 +29,10 @@ time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_
 time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_chunk
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_elementwise_ops
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_embedding_bag
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_binary_cmp
 time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_init
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_math_ops
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_matrix_ops
-time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/ops/test_softmax
 time python test/run_test.py --verbose -i distributed/_shard/sharded_optim/test_sharded_optim
 
 # DTensor/TP tests
 
@@ -236,6 +236,8 @@ test_dynamo_shard() {
       test_fx \
       test_package \
       test_legacy_vmap \
+      functorch/test_dims \
+      functorch/test_aotdispatch \
     --shard "$1" "$NUM_TEST_SHARDS" \
     --verbose
   assert_git_not_dirty
@@ -264,7 +266,7 @@ DYNAMO_BENCHMARK_FLAGS=()
 
 if [[ "${TEST_CONFIG}" == *aot_eager* ]]; then
   DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
-elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
+elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then
   DYNAMO_BENCHMARK_FLAGS+=(--inductor)
 fi
 
@@ -278,6 +280,46 @@ else
   DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
 fi
 
+test_perf_for_dashboard() {
+  TEST_REPORTS_DIR=$(pwd)/test/test-reports
+  mkdir -p "$TEST_REPORTS_DIR"
+
+  local suite="$1"
+  shift
+
+  for dtype in amp float32; do
+    # Run accuracy test
+    # All the accuracy tests can be skipped once the CI accuracy checking is stable enough
+    for backend in eager aot_eager; do
+      python "benchmarks/dynamo/$suite.py" \
+          --accuracy --"$dtype" --backend "$backend" "$@" \
+          --output "$TEST_REPORTS_DIR/${backend}_${suite}_${dtype}_training_cuda_accuracy.csv"
+    done
+
+    # Run accuracy test for inductor with different configs
+    # --disable-cudagraphs is the default inductor behavior
+    # TODO: update here once cudagraphs is turned on as default
+    backend=inductor
+    python "benchmarks/dynamo/$suite.py" \
+        --accuracy --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
+        --output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_training_cuda_accuracy.csv"
+    python "benchmarks/dynamo/$suite.py" \
+        --accuracy --"$dtype" --backend "$backend" "$@" \
+        --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_accuracy.csv"
+
+    # Run performance test
+    # Skip dynamo-eager and aot-eager for performance test
+    # Run performance test for inductor with different configs
+    # TODO: add more configs here, e.g. dynamic-shapes, max-autotune, etc.
+    python "benchmarks/dynamo/$suite.py" \
+        --performance --cold-start-latency --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
+        --output "$TEST_REPORTS_DIR/${backend}_no_cudagraphs_${suite}_${dtype}_training_cuda_performance.csv"
+    python "benchmarks/dynamo/$suite.py" \
+        --performance --cold-start-latency --"$dtype" --backend "$backend" "$@" \
+        --output "$TEST_REPORTS_DIR/${backend}_with_cudagraphs_${suite}_${dtype}_training_cuda_performance.csv"
+  done
+}
+
 test_single_dynamo_benchmark() {
   # Usage: test_single_dynamo_benchmark inductor_inference huggingface 0 --args-for-script
 
@@ -302,15 +344,12 @@ test_single_dynamo_benchmark() {
 
   if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
     python "benchmarks/dynamo/$suite.py" \
-      --ci --performance --disable-cudagraphs \
-      "${DYNAMO_BENCHMARK_FLAGS[@]}" \
-      "$@" "${partition_flags[@]}" \
+      --ci --performance --disable-cudagraphs --inductor \
+      "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}" \
       --output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
   elif [[ "${TEST_CONFIG}" == *perf* ]]; then
-    # MKL_THREADING_LAYER=GNU to mitigate https://github.com/pytorch/pytorch/issues/37377
-    MKL_THREADING_LAYER=GNU python benchmarks/dynamo/runner.py --suites="$suite" \
-      --base-sha="$BASE_SHA" "${partition_flags[@]}" \
-      --no-graphs --no-update-archive --no-gh-comment "$@"
+    test_perf_for_dashboard "$suite" \
+      "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}"
   else
     python "benchmarks/dynamo/$suite.py" \
       --ci --accuracy --timing --explain \
@@ -322,6 +361,7 @@ test_single_dynamo_benchmark() {
     if [[ "${TEST_CONFIG}" == *inductor* ]] && [[ "${TEST_CONFIG}" != *cpu_accuracy* ]] && [[ "${TEST_CONFIG}" != *dynamic* ]]; then
       # because I haven't dealt with dynamic expected artifacts yet,
       # and non-inductor jobs (e.g. periodic, cpu-accuracy) may have different set of expected models.
+      # TODO: make update_expected.py produces combined expected csv file
       python benchmarks/dynamo/check_graph_breaks.py \
         --actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
         --expected "benchmarks/dynamo/ci_expected_accuracy/${name}_${suite}${shard_id}.csv"
@@ -339,11 +379,10 @@ test_dynamo_benchmark() {
   shift
 
   if [[ "${TEST_CONFIG}" == *perf_compare* ]]; then
-    test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --amp "$@"
+    test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
   elif [[ "${TEST_CONFIG}" == *perf* ]]; then
-    # Performance test training only, for float32 and amp
-    test_single_dynamo_benchmark "amp" "$suite" "$shard_id" --training --dtypes=amp --output-dir="$TEST_REPORTS_DIR"/amp "$@"
-    test_single_dynamo_benchmark "float32" "$suite" "$shard_id" --training --dtypes=float32 --output-dir="$TEST_REPORTS_DIR"/float32 "$@"
+    # Performance test training only
+    test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training "$@"
   else
     # Check inference with --float32
     test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --float32 "$@"
@@ -532,6 +571,10 @@ test_vulkan() {
 }
 
 test_distributed() {
+  # Smuggle a few multi-gpu tests here so that we don't have to request another large node
+  echo "Testing multi_gpu tests in test_torchinductor"
+  pytest test/inductor/test_torchinductor.py -k test_multi_gpu
+
   echo "Testing distributed python tests"
   time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
   assert_git_not_dirty
@@ -803,12 +846,6 @@ test_executorch() {
   assert_git_not_dirty
 }
 
-# TODO: Include this in the Docker image
-if [[ "${TEST_CONFIG}" == *_perf* ]]; then
-  install_matplotlib
-  install_tabulate
-fi
-
 if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* || "${BUILD_ENVIRONMENT}" == *-tsan* ]]; then
   (cd test && python -c "import torch; print(torch.__config__.show())")
   (cd test && python -c "import torch; print(torch.__config__.parallel_info())")
@@ -848,7 +885,8 @@ elif [[ "${TEST_CONFIG}" == *dynamo* && "${SHARD_NUMBER}" == 2 && $NUM_TEST_SHAR
 elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then
   install_torchvision
   install_huggingface
-  test_dynamo_benchmark huggingface ""
+  id=$((SHARD_NUMBER-1))
+  test_dynamo_benchmark huggingface "$id"
 elif [[ "${TEST_CONFIG}" == *timm* ]]; then
   install_torchvision
   install_timm
@@ -862,12 +900,13 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
   fi
   install_torchtext
   install_torchvision
+  id=$((SHARD_NUMBER-1))
   if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then
     checkout_install_torchbench hf_Bert hf_Albert timm_efficientdet timm_vision_transformer
     PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf
   else
     checkout_install_torchbench
-    PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench ""
+    PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
   fi
 elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
   install_torchvision
@@ -902,8 +941,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-tsan* ]]; then
   test_libtorch || true
 elif [[ "${TEST_CONFIG}" = docs_test ]]; then
   test_docs_test
-elif [[ "${TEST_CONFIG}" == *functorch* ]]; then
-  test_functorch
 else
   install_torchvision
   install_monkeytype
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2c32f4399986045ff25cae201ed3b16d922a9d3b`
	`1`	`+e650d3708be4dca12cc3491a2f8ab18ded47c368`