-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnormalize_qpy_docs.py
More file actions
72 lines (57 loc) · 1.99 KB
/
normalize_qpy_docs.py
File metadata and controls
72 lines (57 loc) · 1.99 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#!/usr/bin/env python3
"""
Normalize imported QuecPython markdown docs by removing known site-export noise.
"""
from __future__ import annotations
import argparse
import os
import re
REMOVE_PATTERNS = [
r".*Edit this page.*",
r".*toscode\.gitee\.com.*",
r".*mermaid\.initialize.*",
]
def normalize_text(text: str) -> str:
lines = text.splitlines()
out = []
for line in lines:
skip = False
for pattern in REMOVE_PATTERNS:
if re.match(pattern, line):
skip = True
break
if not skip:
out.append(line.rstrip())
# Collapse excessive blank lines.
normalized = "\n".join(out)
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
return normalized.strip() + "\n"
def process_file(src: str, dst: str) -> None:
with open(src, "r", encoding="utf-8", errors="ignore") as f:
text = f.read()
cleaned = normalize_text(text)
os.makedirs(os.path.dirname(dst), exist_ok=True)
with open(dst, "w", encoding="utf-8") as f:
f.write(cleaned)
def main() -> int:
parser = argparse.ArgumentParser(description="Normalize QuecPython markdown docs.")
parser.add_argument("--src", required=True, help="Source directory containing .md files.")
parser.add_argument("--out", required=True, help="Output directory for cleaned files.")
args = parser.parse_args()
if not os.path.isdir(args.src):
print("Source directory not found: %s" % args.src)
return 2
count = 0
for dirpath, _, filenames in os.walk(args.src):
for name in filenames:
if not name.lower().endswith(".md"):
continue
src_file = os.path.join(dirpath, name)
rel = os.path.relpath(src_file, args.src)
dst_file = os.path.join(args.out, rel)
process_file(src_file, dst_file)
count += 1
print("Normalized markdown files: %d" % count)
return 0
if __name__ == "__main__":
raise SystemExit(main())