Skip to content

Commit 32ac38f

Browse files
clee2000pytorchmergebot
authored andcommitted
[lint] workflow consistency linter to look at all files instead of just changed files (#165171)
As in title If you change only one workflow file, lintrunner (default arg, also the one in CI since it only inputs changed files) won't look at other files in the repo, but the sync-tag might come from those other files This makes it so that it looks at all workflow files so it will catch those failures Also change output line so it prints which file + which job it is different from Pros: catches errors Cons: unusual behavior (getting around what lintrunner says the linter should run on) Pull Request resolved: #165171 Approved by: https://github.com/malfet, https://github.com/izaitsevfb, https://github.com/atalman
1 parent c9b49e5 commit 32ac38f

File tree

1 file changed

+58
-32
lines changed

1 file changed

+58
-32
lines changed

tools/linter/adapters/workflow_consistency_linter.py

Lines changed: 58 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
from yaml import dump, load
1717

1818

19+
REPO_ROOT = Path(__file__).resolve().parent.parent.parent.parent
20+
21+
1922
if TYPE_CHECKING:
2023
from collections.abc import Iterable
2124

@@ -59,7 +62,13 @@ def is_workflow(yaml: Any) -> bool:
5962
return yaml.get("jobs") is not None
6063

6164

62-
def print_lint_message(path: Path, job: dict[str, Any], sync_tag: str) -> None:
65+
def print_lint_message(
66+
path: Path,
67+
job: dict[str, Any],
68+
sync_tag: str,
69+
baseline_path: Path,
70+
baseline_job_id: str,
71+
) -> None:
6372
job_id = next(iter(job.keys()))
6473
with open(path) as f:
6574
lines = f.readlines()
@@ -77,11 +86,30 @@ def print_lint_message(path: Path, job: dict[str, Any], sync_tag: str) -> None:
7786
name="workflow-inconsistency",
7887
original=None,
7988
replacement=None,
80-
description=f"Job doesn't match other jobs with sync-tag: '{sync_tag}'",
89+
description=f"Job doesn't match other job {baseline_job_id} in file {baseline_path} with sync-tag: '{sync_tag}'",
8190
)
8291
print(json.dumps(lint_message._asdict()), flush=True)
8392

8493

94+
def get_jobs_with_sync_tag(
95+
job: dict[str, Any],
96+
) -> tuple[str, str, dict[str, Any]] | None:
97+
sync_tag = job.get("with", {}).get("sync-tag")
98+
if sync_tag is None:
99+
return None
100+
101+
# remove the "if" field, which we allow to be different between jobs
102+
# (since you might have different triggering conditions on pull vs.
103+
# trunk, say.)
104+
if "if" in job:
105+
del job["if"]
106+
107+
# same is true for ['with']['test-matrix']
108+
if "test-matrix" in job.get("with", {}):
109+
del job["with"]["test-matrix"]
110+
return (sync_tag, job_id, job)
111+
112+
85113
if __name__ == "__main__":
86114
parser = argparse.ArgumentParser(
87115
description="workflow consistency linter.",
@@ -94,41 +122,39 @@ def print_lint_message(path: Path, job: dict[str, Any], sync_tag: str) -> None:
94122
)
95123
args = parser.parse_args()
96124

97-
# Go through the provided files, aggregating jobs with the same sync tag
125+
# Go through all files, aggregating jobs with the same sync tag
98126
tag_to_jobs = defaultdict(list)
127+
for path in REPO_ROOT.glob(".github/workflows/*"):
128+
if not path.is_file() or path.suffix not in {".yml", ".yaml"}:
129+
continue
130+
workflow = load_yaml(path)
131+
if not is_workflow(workflow):
132+
continue
133+
clean_path = path.relative_to(REPO_ROOT)
134+
jobs = workflow.get("jobs", {})
135+
for job_id, job in jobs.items():
136+
res = get_jobs_with_sync_tag(job)
137+
if res is None:
138+
continue
139+
sync_tag, job_id, job_dict = res
140+
tag_to_jobs[sync_tag].append((clean_path, job_id, job_dict))
141+
142+
# Check the files passed as arguments
99143
for path in args.filenames:
100144
workflow = load_yaml(Path(path))
101145
jobs = workflow["jobs"]
102146
for job_id, job in jobs.items():
103-
try:
104-
sync_tag = job["with"]["sync-tag"]
105-
except KeyError:
147+
res = get_jobs_with_sync_tag(job)
148+
if res is None:
106149
continue
107-
108-
# remove the "if" field, which we allow to be different between jobs
109-
# (since you might have different triggering conditions on pull vs.
110-
# trunk, say.)
111-
if "if" in job:
112-
del job["if"]
113-
114-
# same is true for ['with']['test-matrix']
115-
if "test-matrix" in job.get("with", {}):
116-
del job["with"]["test-matrix"]
117-
118-
tag_to_jobs[sync_tag].append((path, {job_id: job}))
119-
120-
# For each sync tag, check that all the jobs have the same code.
121-
for sync_tag, path_and_jobs in tag_to_jobs.items():
122-
baseline_path, baseline_dict = path_and_jobs.pop()
123-
baseline_str = dump(baseline_dict)
124-
125-
printed_baseline = False
126-
127-
for path, job_dict in path_and_jobs:
150+
sync_tag, job_id, job_dict = res
128151
job_str = dump(job_dict)
129-
if baseline_str != job_str:
130-
print_lint_message(path, job_dict, sync_tag)
131152

132-
if not printed_baseline:
133-
print_lint_message(baseline_path, baseline_dict, sync_tag)
134-
printed_baseline = True
153+
# For each sync tag, check that all the jobs have the same code.
154+
for baseline_path, baseline_job_id, baseline_dict in tag_to_jobs[sync_tag]:
155+
baseline_str = dump(baseline_dict)
156+
157+
if job_id != baseline_job_id or job_str != baseline_str:
158+
print_lint_message(
159+
path, job_dict, sync_tag, baseline_path, baseline_job_id
160+
)

0 commit comments

Comments
 (0)