22# workflow. GitHub does not provide this information to workflow runs, so we
33# need to figure it out based on what they *do* provide.
44
5- import requests
6- import os
75import argparse
6+ import json
7+ import os
8+ import re
9+ import sys
10+ import urllib
11+ import urllib .parse
12+
13+ from typing import Any , Callable , Dict , List , Tuple , Optional
14+ from urllib .request import Request , urlopen
15+
16+ def parse_json_and_links (conn : Any ) -> Tuple [Any , Dict [str , Dict [str , str ]]]:
17+ links = {}
18+ # Extract links which GH uses for pagination
19+ # see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Link
20+ if "Link" in conn .headers :
21+ for elem in re .split (", *<" , conn .headers ["Link" ]):
22+ try :
23+ url , params_ = elem .split (";" , 1 )
24+ except ValueError :
25+ continue
26+ url = urllib .parse .unquote (url .strip ("<> " ))
27+ qparams = urllib .parse .parse_qs (params_ .strip (), separator = ";" )
28+ params = {k : v [0 ].strip ('"' ) for k , v in qparams .items () if type (v ) is list and len (v ) > 0 }
29+ params ["url" ] = url
30+ if "rel" in params :
31+ links [params ["rel" ]] = params
32+
33+ return json .load (conn ), links
834
9- def handle_bad_status (response : requests .Response ) -> None :
10- if response .status_code != 200 :
35+ def fetch_url (url : str , * ,
36+ headers : Optional [Dict [str , str ]] = None ,
37+ reader : Callable [[Any ], Any ] = lambda x : x .read ()) -> Any :
38+ if headers is None :
39+ headers = {}
40+ try :
41+ with urlopen (Request (url , headers = headers )) as conn :
42+ return reader (conn )
43+ except urllib .error .HTTPError as err :
1144 exception_message = (
1245 "Is github alright?" ,
13- f"Recieved status code '{ response . status_code } ' when attempting to retrieve runs :\n " ,
14- f"{ response . content . decode () } "
46+ f"Recieved status code '{ err . code } ' when attempting to retrieve { url } :\n " ,
47+ f"{ err . reason } \n \n headers= { err . headers } "
1548 )
16- raise RuntimeError (exception_message )
49+ raise RuntimeError (exception_message ) from err
50+
51+ def parse_args () -> Any :
52+ parser = argparse .ArgumentParser ()
53+ parser .add_argument (
54+ "workflow_run_id" , help = "The id of the workflow run, should be GITHUB_RUN_ID"
55+ )
56+ parser .add_argument (
57+ "runner_name" ,
58+ help = "The name of the runner to retrieve the job id, should be RUNNER_NAME" ,
59+ )
60+
61+ return parser .parse_args ()
62+
63+
64+ def fetch_jobs (url : str , headers : Dict [str , str ]) -> List [Dict [str , str ]]:
65+ response , links = fetch_url (url , headers = headers , reader = parse_json_and_links )
66+ jobs = response ["jobs" ]
67+ assert type (jobs ) is list
68+ while "next" in links .keys ():
69+ response , links = fetch_url (links ["next" ]["url" ], headers = headers , reader = parse_json_and_links )
70+ jobs .extend (response ["jobs" ])
71+
72+ return jobs
1773
1874
1975# Our strategy is to retrieve the parent workflow run, then filter its jobs on
@@ -29,46 +85,37 @@ def handle_bad_status(response: requests.Response) -> None:
2985# since only one job can be scheduled on a runner at a time, we know that
3086# looking for RUNNER_NAME will uniquely identify the job we're currently
3187# running.
32- parser = argparse .ArgumentParser ()
33- parser .add_argument (
34- "workflow_run_id" , help = "The id of the workflow run, should be GITHUB_RUN_ID"
35- )
36- parser .add_argument (
37- "runner_name" ,
38- help = "The name of the runner to retrieve the job id, should be RUNNER_NAME" ,
39- )
40-
41- args = parser .parse_args ()
42-
43-
44- # From https://docs.github.com/en/actions/learn-github-actions/environment-variables
45- PYTORCH_REPO = os .environ .get ("GITHUB_REPOSITORY" , "pytorch/pytorch" )
46- PYTORCH_GITHUB_API = f"https://api.github.com/repos/{ PYTORCH_REPO } "
47- GITHUB_TOKEN = os .environ ["GITHUB_TOKEN" ]
48- REQUEST_HEADERS = {
49- "Accept" : "application/vnd.github.v3+json" ,
50- "Authorization" : "token " + GITHUB_TOKEN ,
51- }
52-
53- response = requests .get (
54- f"{ PYTORCH_GITHUB_API } /actions/runs/{ args .workflow_run_id } /jobs?per_page=100" ,
55- headers = REQUEST_HEADERS ,
56- )
57- handle_bad_status (response )
58-
59- jobs = response .json ()["jobs" ]
60- while "next" in response .links .keys ():
61- response = requests .get (response .links ["next" ]["url" ], headers = REQUEST_HEADERS )
62- handle_bad_status (response )
63- jobs .extend (response .json ()["jobs" ])
64-
65- # Sort the jobs list by start time, in descending order. We want to get the most
66- # recently scheduled job on the runner.
67- jobs .sort (key = lambda job : job ["started_at" ], reverse = True )
68-
69- for job in jobs :
70- if job ["runner_name" ] == args .runner_name :
71- print (job ["id" ])
72- exit (0 )
73-
74- exit (1 )
88+
89+ def find_job_id (args : Any ) -> str :
90+ # From https://docs.github.com/en/actions/learn-github-actions/environment-variables
91+ PYTORCH_REPO = os .environ .get ("GITHUB_REPOSITORY" , "pytorch/pytorch" )
92+ PYTORCH_GITHUB_API = f"https://api.github.com/repos/{ PYTORCH_REPO } "
93+ GITHUB_TOKEN = os .environ ["GITHUB_TOKEN" ]
94+ REQUEST_HEADERS = {
95+ "Accept" : "application/vnd.github.v3+json" ,
96+ "Authorization" : "token " + GITHUB_TOKEN ,
97+ }
98+
99+ url = f"{ PYTORCH_GITHUB_API } /actions/runs/{ args .workflow_run_id } /jobs?per_page=100"
100+ jobs = fetch_jobs (url , REQUEST_HEADERS )
101+
102+ # Sort the jobs list by start time, in descending order. We want to get the most
103+ # recently scheduled job on the runner.
104+ jobs .sort (key = lambda job : job ["started_at" ], reverse = True )
105+
106+ for job in jobs :
107+ if job ["runner_name" ] == args .runner_name :
108+ return job ["id" ]
109+
110+ raise RuntimeError (f"Can't find job id for runner { args .runner_name } " )
111+
112+ def main () -> None :
113+ args = parse_args ()
114+ try :
115+ print (find_job_id (args ))
116+ except Exception as e :
117+ print (repr (e ), file = sys .stderr )
118+ print (f"workflow-{ args .workflow_run_id } " )
119+
120+ if __name__ == "__main__" :
121+ main ()
0 commit comments