33Check the output of running Sphinx in nit-picky mode (missing references).
44"""
55import argparse
6- import csv
6+ import itertools
77import os
88import re
9+ import subprocess
910import sys
1011from pathlib import Path
12+ from typing import TextIO
1113
1214# Exclude these whether they're dirty or clean,
1315# because they trigger a rebuild of dirty files.
2426 "venv" ,
2527}
2628
27- PATTERN = re .compile (r"(?P<file>[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)" )
29+ # Regex pattern to match the parts of a Sphinx warning
30+ WARNING_PATTERN = re .compile (
31+ r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
32+ )
2833
34+ # Regex pattern to match the line numbers in a Git unified diff
35+ DIFF_PATTERN = re .compile (
36+ r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@" ,
37+ flags = re .MULTILINE ,
38+ )
2939
30- def check_and_annotate (warnings : list [str ], files_to_check : str ) -> None :
40+
41+ def get_diff_files (ref_a : str , ref_b : str , filter_mode : str = "" ) -> set [Path ]:
42+ """List the files changed between two Gif refs, filtered by change type."""
43+ added_files_result = subprocess .run (
44+ [
45+ "git" ,
46+ "diff" ,
47+ f"--diff-filter={ filter_mode } " ,
48+ "--name-only" ,
49+ f"{ ref_a } ...{ ref_b } " ,
50+ "--" ,
51+ ],
52+ stdout = subprocess .PIPE ,
53+ check = True ,
54+ text = True ,
55+ encoding = "UTF-8" ,
56+ )
57+
58+ added_files = added_files_result .stdout .strip ().split ("\n " )
59+ return {Path (file .strip ()) for file in added_files if file .strip ()}
60+
61+
62+ def get_diff_lines (ref_a : str , ref_b : str , file : Path ) -> list [int ]:
63+ """List the lines changed between two Gif refs for a specific file."""
64+ diff_output = subprocess .run (
65+ [
66+ "git" ,
67+ "diff" ,
68+ "--unified=0" ,
69+ f"{ ref_a } ...{ ref_b } " ,
70+ "--" ,
71+ str (file ),
72+ ],
73+ stdout = subprocess .PIPE ,
74+ check = True ,
75+ text = True ,
76+ encoding = "UTF-8" ,
77+ )
78+
79+ # Scrape line offsets + lengths from diff and convert to line numbers
80+ line_matches = DIFF_PATTERN .finditer (diff_output .stdout )
81+ line_match_values = [
82+ line_match .groupdict (default = 1 ) for line_match in line_matches
83+ ]
84+ line_ints = [
85+ (int (match_value ["lineb" ]), int (match_value ["added" ]))
86+ for match_value in line_match_values
87+ ]
88+ line_ranges = [
89+ range (line_b , line_b + added ) for line_b , added in line_ints
90+ ]
91+ line_numbers = list (itertools .chain (* line_ranges ))
92+
93+ return line_numbers
94+
95+
96+ def get_para_line_numbers (file_obj : TextIO ) -> list [list [int ]]:
97+ """Get the line numbers of text in a file object, grouped by paragraph."""
98+ paragraphs = []
99+ prev_line = None
100+ for lineno , line in enumerate (file_obj ):
101+ lineno = lineno + 1
102+ if prev_line is None or (line .strip () and not prev_line .strip ()):
103+ paragraph = [lineno - 1 ]
104+ paragraphs .append (paragraph )
105+ paragraph .append (lineno )
106+ prev_line = line
107+ return paragraphs
108+
109+
110+ def parse_and_filter_warnings (
111+ warnings : list [str ], files : set [Path ]
112+ ) -> list [re .Match [str ]]:
113+ """Get the warnings matching passed files and parse them with regex."""
114+ filtered_warnings = [
115+ warning
116+ for warning in warnings
117+ if any (str (file ) in warning for file in files )
118+ ]
119+ warning_matches = [
120+ WARNING_PATTERN .fullmatch (warning .strip ())
121+ for warning in filtered_warnings
122+ ]
123+ non_null_matches = [warning for warning in warning_matches if warning ]
124+ return non_null_matches
125+
126+
127+ def process_touched_warnings (
128+ warnings : list [str ], ref_a : str , ref_b : str
129+ ) -> list [re .Match [str ]]:
130+ """Filter a list of Sphinx warnings to those affecting touched lines."""
131+ added_files , modified_files = tuple (
132+ get_diff_files (ref_a , ref_b , filter_mode = mode ) for mode in ("A" , "M" )
133+ )
134+
135+ warnings_added = parse_and_filter_warnings (warnings , added_files )
136+ warnings_modified = parse_and_filter_warnings (warnings , modified_files )
137+
138+ modified_files_warned = {
139+ file
140+ for file in modified_files
141+ if any (str (file ) in warning ["file" ] for warning in warnings_modified )
142+ }
143+
144+ warnings_touched = warnings_added .copy ()
145+ for file in modified_files_warned :
146+ diff_lines = get_diff_lines (ref_a , ref_b , file )
147+ with file .open (encoding = "UTF-8" ) as file_obj :
148+ paragraphs = get_para_line_numbers (file_obj )
149+ touched_paras = [
150+ para_lines
151+ for para_lines in paragraphs
152+ if set (diff_lines ) & set (para_lines )
153+ ]
154+ touched_para_lines = set (itertools .chain (* touched_paras ))
155+ warnings_infile = [
156+ warning
157+ for warning in warnings_modified
158+ if str (file ) in warning ["file" ]
159+ ]
160+ warnings_touched += [
161+ warning
162+ for warning in warnings_infile
163+ if int (warning ["line" ]) in touched_para_lines
164+ ]
165+
166+ return warnings_touched
167+
168+
169+ def check_and_annotate_changed (
170+ warnings : list [str ], ref_a : str = "main" , ref_b : str = "HEAD"
171+ ) -> None :
31172 """
32- Convert Sphinx warning messages to GitHub Actions.
173+ Convert Sphinx warning messages to GitHub Actions for changed paragraphs .
33174
34175 Converts lines like:
35176 .../Doc/library/cgi.rst:98: WARNING: reference target not found
36177 to:
37178 ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
38179
39- Non-matching lines are echoed unchanged.
40-
41- see:
180+ See:
42181 https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
43182 """
44- files_to_check = next (csv .reader ([files_to_check ]))
45- for warning in warnings :
46- if any (filename in warning for filename in files_to_check ):
47- if match := PATTERN .fullmatch (warning ):
48- print ("::warning file={file},line={line}::{msg}" .format_map (match ))
183+ warnings_touched = process_touched_warnings (warnings , ref_a , ref_b )
184+ print ("Emitting doc warnings matching modified lines:" )
185+ for warning in warnings_touched :
186+ print (warning [0 ])
187+ print ("::warning file={file},line={line}::{msg}" .format_map (warning ))
188+ if not warnings_touched :
189+ print ("None" )
49190
50191
51192def fail_if_regression (
@@ -68,7 +209,7 @@ def fail_if_regression(
68209 print (filename )
69210 for warning in warnings :
70211 if filename in warning :
71- if match := PATTERN .fullmatch (warning ):
212+ if match := WARNING_PATTERN .fullmatch (warning ):
72213 print (" {line}: {msg}" .format_map (match ))
73214 return - 1
74215 return 0
@@ -94,9 +235,10 @@ def fail_if_improved(
94235def main () -> int :
95236 parser = argparse .ArgumentParser ()
96237 parser .add_argument (
97- "--check-and-annotate" ,
98- help = "Comma-separated list of files to check, "
99- "and annotate those with warnings on GitHub Actions" ,
238+ "--check-and-annotate-changed" ,
239+ nargs = 2 ,
240+ help = "Annotate lines changed between two refs "
241+ "with warnings on GitHub Actions" ,
100242 )
101243 parser .add_argument (
102244 "--fail-if-regression" ,
@@ -114,7 +256,7 @@ def main() -> int:
114256 wrong_directory_msg = "Must run this script from the repo root"
115257 assert Path ("Doc" ).exists () and Path ("Doc" ).is_dir (), wrong_directory_msg
116258
117- with Path ("Doc/sphinx-warnings.txt" ).open () as f :
259+ with Path ("Doc/sphinx-warnings.txt" ).open (encoding = "UTF-8" ) as f :
118260 warnings = f .read ().splitlines ()
119261
120262 cwd = str (Path .cwd ()) + os .path .sep
@@ -124,15 +266,15 @@ def main() -> int:
124266 if "Doc/" in warning
125267 }
126268
127- with Path ("Doc/tools/.nitignore" ).open () as clean_files :
269+ with Path ("Doc/tools/.nitignore" ).open (encoding = "UTF-8" ) as clean_files :
128270 files_with_expected_nits = {
129271 filename .strip ()
130272 for filename in clean_files
131273 if filename .strip () and not filename .startswith ("#" )
132274 }
133275
134- if args .check_and_annotate :
135- check_and_annotate (warnings , args .check_and_annotate )
276+ if args .check_and_annotate_changed :
277+ check_and_annotate_changed (warnings , * args .check_and_annotate_changed )
136278
137279 if args .fail_if_regression :
138280 exit_code += fail_if_regression (
0 commit comments