pytorch
diff --git a/‎.github/ci_commit_pins/xla.txt‎
Lines changed: 1 addition & 1 deletion b/‎.github/ci_commit_pins/xla.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/merge_rules.yaml‎
Lines changed: 11 additions & 0 deletions b/‎.github/merge_rules.yaml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎.github/scripts/get_workflow_job_id.py‎
Lines changed: 97 additions & 50 deletions b/‎.github/scripts/get_workflow_job_id.py‎
Lines changed: 97 additions & 50 deletions
diff --git a/‎.github/workflows/_rocm-test.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/_rocm-test.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/pull.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/unstable.yml‎
Lines changed: 33 additions & 0 deletions b/‎.github/workflows/unstable.yml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎.jenkins/pytorch/multigpu-test.sh‎
Lines changed: 1 addition & 0 deletions b/‎.jenkins/pytorch/multigpu-test.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎BUILD.bazel‎
Lines changed: 0 additions & 1 deletion b/‎BUILD.bazel‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎WORKSPACE‎
Lines changed: 0 additions & 6 deletions b/‎WORKSPACE‎
Lines changed: 0 additions & 6 deletions
@@ -1 +1 @@
-d43a14a9adb89e6280b311f0513da2d3f3b0c618
+2ffe4a04df9d498e250153d931cadf9d92268510
@@ -350,6 +350,17 @@
   - Lint
   - pull
 
+- name: ROCm
+  patterns:
+  - '**rocm**'
+  - '**hip**'
+  approved_by:
+  - jeffdaily
+  mandatory_checks_name:
+  - EasyCLA
+  - Lint
+  - pull
+
 - name: superuser
   patterns:
   - '*'
 
@@ -2,18 +2,74 @@
 # workflow. GitHub does not provide this information to workflow runs, so we
 # need to figure it out based on what they *do* provide.
 
-import requests
-import os
 import argparse
+import json
+import os
+import re
+import sys
+import urllib
+import urllib.parse
+
+from typing import Any, Callable, Dict, List, Tuple, Optional
+from urllib.request import Request, urlopen
+
+def parse_json_and_links(conn: Any) -> Tuple[Any, Dict[str, Dict[str, str]]]:
+    links = {}
+    # Extract links which GH uses for pagination
+    # see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
+    if "Link" in conn.headers:
+        for elem in re.split(", *<", conn.headers["Link"]):
+            try:
+                url, params_ = elem.split(";", 1)
+            except ValueError:
+                continue
+            url = urllib.parse.unquote(url.strip("<> "))
+            qparams = urllib.parse.parse_qs(params_.strip(), separator=";")
+            params = {k: v[0].strip('"') for k, v in qparams.items() if type(v) is list and len(v) > 0}
+            params["url"] = url
+            if "rel" in params:
+                links[params["rel"]] = params
+
+    return json.load(conn), links
 
-def handle_bad_status(response: requests.Response) -> None:
-    if response.status_code != 200:
+def fetch_url(url: str, *,
+              headers: Optional[Dict[str, str]] = None,
+              reader: Callable[[Any], Any] = lambda x: x.read()) -> Any:
+    if headers is None:
+        headers = {}
+    try:
+        with urlopen(Request(url, headers=headers)) as conn:
+            return reader(conn)
+    except urllib.error.HTTPError as err:
         exception_message = (
             "Is github alright?",
-            f"Recieved status code '{response.status_code}' when attempting to retrieve runs:\n",
-            f"{response.content.decode()}"
+            f"Recieved status code '{err.code}' when attempting to retrieve {url}:\n",
+            f"{err.reason}\n\nheaders={err.headers}"
         )
-        raise RuntimeError(exception_message)
+        raise RuntimeError(exception_message) from err
+
+def parse_args() -> Any:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "workflow_run_id", help="The id of the workflow run, should be GITHUB_RUN_ID"
+    )
+    parser.add_argument(
+        "runner_name",
+        help="The name of the runner to retrieve the job id, should be RUNNER_NAME",
+    )
+
+    return parser.parse_args()
+
+
+def fetch_jobs(url: str, headers: Dict[str, str]) -> List[Dict[str, str]]:
+    response, links = fetch_url(url, headers=headers, reader=parse_json_and_links)
+    jobs = response["jobs"]
+    assert type(jobs) is list
+    while "next" in links.keys():
+        response, links = fetch_url(links["next"]["url"], headers=headers, reader=parse_json_and_links)
+        jobs.extend(response["jobs"])
+
+    return jobs
 
 
 # Our strategy is to retrieve the parent workflow run, then filter its jobs on
@@ -29,46 +85,37 @@ def handle_bad_status(response: requests.Response) -> None:
 # since only one job can be scheduled on a runner at a time, we know that
 # looking for RUNNER_NAME will uniquely identify the job we're currently
 # running.
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "workflow_run_id", help="The id of the workflow run, should be GITHUB_RUN_ID"
-)
-parser.add_argument(
-    "runner_name",
-    help="The name of the runner to retrieve the job id, should be RUNNER_NAME",
-)
-
-args = parser.parse_args()
-
-
-# From https://docs.github.com/en/actions/learn-github-actions/environment-variables
-PYTORCH_REPO = os.environ.get("GITHUB_REPOSITORY", "pytorch/pytorch")
-PYTORCH_GITHUB_API = f"https://api.github.com/repos/{PYTORCH_REPO}"
-GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
-REQUEST_HEADERS = {
-    "Accept": "application/vnd.github.v3+json",
-    "Authorization": "token " + GITHUB_TOKEN,
-}
-
-response = requests.get(
-    f"{PYTORCH_GITHUB_API}/actions/runs/{args.workflow_run_id}/jobs?per_page=100",
-    headers=REQUEST_HEADERS,
-)
-handle_bad_status(response)
-
-jobs = response.json()["jobs"]
-while "next" in response.links.keys():
-    response = requests.get(response.links["next"]["url"], headers=REQUEST_HEADERS)
-    handle_bad_status(response)
-    jobs.extend(response.json()["jobs"])
-
-# Sort the jobs list by start time, in descending order. We want to get the most
-# recently scheduled job on the runner.
-jobs.sort(key=lambda job: job["started_at"], reverse=True)
-
-for job in jobs:
-    if job["runner_name"] == args.runner_name:
-        print(job["id"])
-        exit(0)
-
-exit(1)
+
+def find_job_id(args: Any) -> str:
+    # From https://docs.github.com/en/actions/learn-github-actions/environment-variables
+    PYTORCH_REPO = os.environ.get("GITHUB_REPOSITORY", "pytorch/pytorch")
+    PYTORCH_GITHUB_API = f"https://api.github.com/repos/{PYTORCH_REPO}"
+    GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
+    REQUEST_HEADERS = {
+        "Accept": "application/vnd.github.v3+json",
+        "Authorization": "token " + GITHUB_TOKEN,
+    }
+
+    url = f"{PYTORCH_GITHUB_API}/actions/runs/{args.workflow_run_id}/jobs?per_page=100"
+    jobs = fetch_jobs(url, REQUEST_HEADERS)
+
+    # Sort the jobs list by start time, in descending order. We want to get the most
+    # recently scheduled job on the runner.
+    jobs.sort(key=lambda job: job["started_at"], reverse=True)
+
+    for job in jobs:
+        if job["runner_name"] == args.runner_name:
+            return job["id"]
+
+    raise RuntimeError(f"Can't find job id for runner {args.runner_name}")
+
+def main() -> None:
+    args = parse_args()
+    try:
+        print(find_job_id(args))
+    except Exception as e:
+        print(repr(e), file=sys.stderr)
+        print(f"workflow-{args.workflow_run_id}")
+
+if __name__ == "__main__":
+    main()
@@ -118,7 +118,6 @@ jobs:
           SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
           DOCKER_IMAGE: ${{ inputs.docker-image }}
           XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla
-          PYTORCH_JIT_ENABLE_NVFUSER: 1
           PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
           PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
         timeout-minutes: 270
 
@@ -217,7 +217,7 @@ jobs:
       docker-image-name: xla_base
       test-matrix: |
         { include: [
-          { config: "xla", shard: 1, num_shards: 1, runner: "linux.2xlarge" },
+          { config: "xla", shard: 1, num_shards: 1, runner: "linux.4xlarge" },
         ]}
 
   linux-bionic-py3_7-clang8-xla-test:
 
@@ -0,0 +1,33 @@
+name: unstable
+
+on:
+  push:
+    branches:
+      - master
+      - main
+    tags:
+      - ciflow/unstable/*
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  # There must be at least one job here to satisfy GitHub action workflow syntax
+  introduction:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    steps:
+      - name: Introduce PyTorch unstable workflow
+        run: |
+          echo "PyTorch unstable workflow is used to host experimental or flaky jobs"
+          echo " that needs to be run for every commit, but doesn't block PR merging"
+          echo " as part of the stable pull or trunk workflows."
+          echo
+          echo "In addition, a new label called ciflow/unstable can be attached to the"
+          echo " PR to trigger this workflow. That can be done either manually or"
+          echo " automatically using PyTorch auto-label bot."
+          echo
+          echo "Once the jobs are deemed stable enough (% red signal < 20% and TTS < 3h),"
+          echo " they can graduate and move back to pull or trunk."
@@ -45,4 +45,5 @@ time python test/run_test.py --verbose -i distributed/_shard/test_partial_tensor
 time python test/run_test.py --verbose -i distributed/_shard/test_replicated_tensor
 # Other tests
 time python test/run_test.py --verbose -i test_cuda_primary_ctx
+time python test/run_test.py --verbose -i test_optim -- -k optimizers_with_varying_tensors
 assert_git_not_dirty
@@ -407,7 +407,6 @@ cc_library(
         "@cuda//:cusolver",
         "@cuda//:nvrtc",
         "@cudnn",
-        "@cudnn_frontend",
     ],
     alwayslink = True,
 )
 
@@ -195,6 +195,9 @@ cmake_dependent_option(
 cmake_dependent_option(
     BUILD_NVFUSER_BENCHMARK "Build C++ binaries for nvfuser benchmarks" OFF
     "USE_CUDA" OFF)
+cmake_dependent_option(
+    USE_EXPERIMENTAL_CUDNN_V8_API "Use experimental cuDNN v8 API" ON
+    "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
 option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
@@ -802,6 +805,7 @@ if(NOT MSVC)
   append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS)
+  append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Winconsistent-missing-override" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-missing-field-initializers" CMAKE_CXX_FLAGS)
 
@@ -203,12 +203,6 @@ new_local_repository(
     path = "/usr/",
 )
 
-new_local_repository(
-    name = "cudnn_frontend",
-    build_file = "@//third_party:cudnn_frontend.BUILD",
-    path = "third_party/cudnn_frontend/",
-)
-
 local_repository(
     name = "com_github_google_flatbuffers",
     path = "third_party/flatbuffers",
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-d43a14a9adb89e6280b311f0513da2d3f3b0c618`
	`1`	`+2ffe4a04df9d498e250153d931cadf9d92268510`
Original file line number	Diff line number	Diff line change
`@@ -407,7 +407,6 @@ cc_library(`
`407`	`407`	`"@cuda//:cusolver",`
`408`	`408`	`"@cuda//:nvrtc",`
`409`	`409`	`"@cudnn",`
`410`		`- "@cudnn_frontend",`
`411`	`410`	`],`
`412`	`411`	`alwayslink = True,`
`413`	`412`	`)`