Skip to content

Commit 200ef00

Browse files
committed
Added tools/toc.py to generate TOC for .md files (Issue cztomczak#217)
1 parent 2a47413 commit 200ef00

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

tools/doctoc.py

Whitespace-only changes.

tools/toc.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
# Copyright (c) 2016 The CEF Python Authors. All rights reserved.
2+
3+
"""Create Table of contents (TOC) for a single .md file or for a directory.
4+
5+
Usage:
6+
toc.py FILE
7+
toc.py DIR
8+
"""
9+
10+
import os
11+
import sys
12+
import re
13+
import glob
14+
15+
16+
def main():
17+
"""Main entry point."""
18+
if (len(sys.argv) == 1 or
19+
"-h" in sys.argv or
20+
"--help" in sys.argv or
21+
"/?" in sys.argv):
22+
print(__doc__.strip())
23+
sys.exit(0)
24+
arg1 = sys.argv[1]
25+
if os.path.isdir(arg1):
26+
(modified, warnings) = toc_dir(arg1)
27+
if modified:
28+
print("Done")
29+
else:
30+
print("No changes to TOCs. Files not modified.")
31+
else:
32+
(modified, warnings) = toc_file(arg1)
33+
if modified:
34+
print("Done")
35+
else:
36+
print("No changes to TOC. File not modified.")
37+
if warnings:
38+
print("Warnings: "+str(warnings))
39+
40+
41+
def toc_file(file_):
42+
"""A single file was passed to doctoc. Return bool whether modified
43+
and the number of warnings."""
44+
with open(file_, "rb") as fo:
45+
orig_contents = fo.read()
46+
# Fix new lines just in case. Not using Python's "rU",
47+
# it is causing strange issues.
48+
orig_contents = re.sub(r"(\r\n|\r|\n)", os.linesep, orig_contents)
49+
(tocsize, contents, warnings) = create_toc(orig_contents, file_)
50+
if contents != orig_contents:
51+
with open(file_, "wb") as fo:
52+
fo.write(contents)
53+
tocsize_str = ("TOC size: "+str(tocsize) if tocsize
54+
else "TOC removed")
55+
print("Modified: "+file_+" ("+tocsize_str+")")
56+
return True, warnings
57+
else:
58+
return False, warnings
59+
60+
61+
def toc_dir(dir_):
62+
"""A directory was passed to doctoc. Return bool whether any file was
63+
modified and the number of warnings."""
64+
files = glob.glob(os.path.join(dir_, "*.md"))
65+
modified_any = False
66+
for file_ in files:
67+
(modified, warnings) = toc_file(file_)
68+
modified_any = True if modified else False
69+
return modified_any, warnings
70+
71+
72+
def create_toc(contents, file_):
73+
"""Create or modify TOC for the document contents."""
74+
match = re.search(r"Table of contents:%s(\s*\* \[[^\]]+\]\([^)]+\)%s){2,}"
75+
% (os.linesep, os.linesep), contents)
76+
oldtoc = match.group(0) if match else None
77+
(tocsize, toc, warnings) = parse_headings(contents, file_)
78+
if oldtoc:
79+
if not toc:
80+
# If toc removed need to remove an additional new lines
81+
# that was inserted after toc.
82+
contents = contents.replace(oldtoc+os.linesep, toc)
83+
else:
84+
contents = contents.replace(oldtoc, toc)
85+
elif tocsize:
86+
# Insert after H1, but if there is text directly after H1
87+
# then insert after that text.
88+
first_line = False
89+
if not re.search(r"^#\s+", contents):
90+
print("WARNING: missing H1 on first line. Ignoring file: "+file_)
91+
return 0, contents, warnings+1
92+
lines = contents.splitlines()
93+
contents = ""
94+
toc_inserted = False
95+
for line in lines:
96+
if not first_line:
97+
first_line = True
98+
else:
99+
if not toc_inserted and re.search(r"^(##|###)", line):
100+
contents = contents[0:-len(os.linesep)]
101+
contents += toc + os.linesep + os.linesep
102+
toc_inserted = True
103+
contents += line + os.linesep
104+
return tocsize, contents, warnings
105+
106+
107+
def parse_headings(raw_contents, file_):
108+
"""Parse contents looking for headings. Return a tuple with number
109+
of TOC elements, the TOC fragment and the number of warnings."""
110+
# Remove code blocks
111+
parsable_contents = re.sub(r"```[\s\S]+?```", "", raw_contents)
112+
# Parse H1,H2,H3
113+
headings = re.findall(r"^(#|##|###)\s+(.*)", parsable_contents,
114+
re.MULTILINE)
115+
toc = "Table of contents:" + os.linesep
116+
tocsize = 0
117+
warnings = 0
118+
count_h1 = 0
119+
count_h2 = 0
120+
for heading in headings:
121+
level = heading[0]
122+
level = (1 if level == "#" else
123+
2 if level == "##" else
124+
3 if level == "###" else None)
125+
assert level is not None
126+
title = heading[1].strip()
127+
if level == 1:
128+
count_h1 += 1
129+
if count_h1 > 1:
130+
warnings += 1
131+
print("WARNING: found more than one H1 in "+file_)
132+
continue
133+
if level == 2:
134+
count_h2 += 1
135+
hash_ = title.lower()
136+
hash_ = re.sub(r"[^a-z0-9- ]+", r"", hash_)
137+
hash_ = hash_.replace(" ", "-")
138+
hash_ = re.sub(r"[-]+", r"-", hash_)
139+
hash_ = re.sub(r"-$", r"", hash_)
140+
indent = ""
141+
if level == 3:
142+
if count_h2:
143+
# If there was no H2 yet then H3 shouldn't have indent.
144+
indent = " " * 2
145+
toc += indent + "* [%s](#%s)" % (title, hash_) + os.linesep
146+
tocsize += 1
147+
if tocsize <= 1:
148+
# If there is only one H2/H3 heading do not create TOC.
149+
toc = ""
150+
tocsize = 0
151+
return tocsize, toc, warnings
152+
153+
154+
if __name__ == "__main__":
155+
main()

0 commit comments

Comments
 (0)