|
| 1 | +# Copyright (c) 2016 The CEF Python Authors. All rights reserved. |
| 2 | + |
| 3 | +"""Create Table of contents (TOC) for a single .md file or for a directory. |
| 4 | +
|
| 5 | +Usage: |
| 6 | + toc.py FILE |
| 7 | + toc.py DIR |
| 8 | +""" |
| 9 | + |
| 10 | +import os |
| 11 | +import sys |
| 12 | +import re |
| 13 | +import glob |
| 14 | + |
| 15 | + |
| 16 | +def main(): |
| 17 | + """Main entry point.""" |
| 18 | + if (len(sys.argv) == 1 or |
| 19 | + "-h" in sys.argv or |
| 20 | + "--help" in sys.argv or |
| 21 | + "/?" in sys.argv): |
| 22 | + print(__doc__.strip()) |
| 23 | + sys.exit(0) |
| 24 | + arg1 = sys.argv[1] |
| 25 | + if os.path.isdir(arg1): |
| 26 | + (modified, warnings) = toc_dir(arg1) |
| 27 | + if modified: |
| 28 | + print("Done") |
| 29 | + else: |
| 30 | + print("No changes to TOCs. Files not modified.") |
| 31 | + else: |
| 32 | + (modified, warnings) = toc_file(arg1) |
| 33 | + if modified: |
| 34 | + print("Done") |
| 35 | + else: |
| 36 | + print("No changes to TOC. File not modified.") |
| 37 | + if warnings: |
| 38 | + print("Warnings: "+str(warnings)) |
| 39 | + |
| 40 | + |
| 41 | +def toc_file(file_): |
| 42 | + """A single file was passed to doctoc. Return bool whether modified |
| 43 | + and the number of warnings.""" |
| 44 | + with open(file_, "rb") as fo: |
| 45 | + orig_contents = fo.read() |
| 46 | + # Fix new lines just in case. Not using Python's "rU", |
| 47 | + # it is causing strange issues. |
| 48 | + orig_contents = re.sub(r"(\r\n|\r|\n)", os.linesep, orig_contents) |
| 49 | + (tocsize, contents, warnings) = create_toc(orig_contents, file_) |
| 50 | + if contents != orig_contents: |
| 51 | + with open(file_, "wb") as fo: |
| 52 | + fo.write(contents) |
| 53 | + tocsize_str = ("TOC size: "+str(tocsize) if tocsize |
| 54 | + else "TOC removed") |
| 55 | + print("Modified: "+file_+" ("+tocsize_str+")") |
| 56 | + return True, warnings |
| 57 | + else: |
| 58 | + return False, warnings |
| 59 | + |
| 60 | + |
| 61 | +def toc_dir(dir_): |
| 62 | + """A directory was passed to doctoc. Return bool whether any file was |
| 63 | + modified and the number of warnings.""" |
| 64 | + files = glob.glob(os.path.join(dir_, "*.md")) |
| 65 | + modified_any = False |
| 66 | + for file_ in files: |
| 67 | + (modified, warnings) = toc_file(file_) |
| 68 | + modified_any = True if modified else False |
| 69 | + return modified_any, warnings |
| 70 | + |
| 71 | + |
| 72 | +def create_toc(contents, file_): |
| 73 | + """Create or modify TOC for the document contents.""" |
| 74 | + match = re.search(r"Table of contents:%s(\s*\* \[[^\]]+\]\([^)]+\)%s){2,}" |
| 75 | + % (os.linesep, os.linesep), contents) |
| 76 | + oldtoc = match.group(0) if match else None |
| 77 | + (tocsize, toc, warnings) = parse_headings(contents, file_) |
| 78 | + if oldtoc: |
| 79 | + if not toc: |
| 80 | + # If toc removed need to remove an additional new lines |
| 81 | + # that was inserted after toc. |
| 82 | + contents = contents.replace(oldtoc+os.linesep, toc) |
| 83 | + else: |
| 84 | + contents = contents.replace(oldtoc, toc) |
| 85 | + elif tocsize: |
| 86 | + # Insert after H1, but if there is text directly after H1 |
| 87 | + # then insert after that text. |
| 88 | + first_line = False |
| 89 | + if not re.search(r"^#\s+", contents): |
| 90 | + print("WARNING: missing H1 on first line. Ignoring file: "+file_) |
| 91 | + return 0, contents, warnings+1 |
| 92 | + lines = contents.splitlines() |
| 93 | + contents = "" |
| 94 | + toc_inserted = False |
| 95 | + for line in lines: |
| 96 | + if not first_line: |
| 97 | + first_line = True |
| 98 | + else: |
| 99 | + if not toc_inserted and re.search(r"^(##|###)", line): |
| 100 | + contents = contents[0:-len(os.linesep)] |
| 101 | + contents += toc + os.linesep + os.linesep |
| 102 | + toc_inserted = True |
| 103 | + contents += line + os.linesep |
| 104 | + return tocsize, contents, warnings |
| 105 | + |
| 106 | + |
| 107 | +def parse_headings(raw_contents, file_): |
| 108 | + """Parse contents looking for headings. Return a tuple with number |
| 109 | + of TOC elements, the TOC fragment and the number of warnings.""" |
| 110 | + # Remove code blocks |
| 111 | + parsable_contents = re.sub(r"```[\s\S]+?```", "", raw_contents) |
| 112 | + # Parse H1,H2,H3 |
| 113 | + headings = re.findall(r"^(#|##|###)\s+(.*)", parsable_contents, |
| 114 | + re.MULTILINE) |
| 115 | + toc = "Table of contents:" + os.linesep |
| 116 | + tocsize = 0 |
| 117 | + warnings = 0 |
| 118 | + count_h1 = 0 |
| 119 | + count_h2 = 0 |
| 120 | + for heading in headings: |
| 121 | + level = heading[0] |
| 122 | + level = (1 if level == "#" else |
| 123 | + 2 if level == "##" else |
| 124 | + 3 if level == "###" else None) |
| 125 | + assert level is not None |
| 126 | + title = heading[1].strip() |
| 127 | + if level == 1: |
| 128 | + count_h1 += 1 |
| 129 | + if count_h1 > 1: |
| 130 | + warnings += 1 |
| 131 | + print("WARNING: found more than one H1 in "+file_) |
| 132 | + continue |
| 133 | + if level == 2: |
| 134 | + count_h2 += 1 |
| 135 | + hash_ = title.lower() |
| 136 | + hash_ = re.sub(r"[^a-z0-9- ]+", r"", hash_) |
| 137 | + hash_ = hash_.replace(" ", "-") |
| 138 | + hash_ = re.sub(r"[-]+", r"-", hash_) |
| 139 | + hash_ = re.sub(r"-$", r"", hash_) |
| 140 | + indent = "" |
| 141 | + if level == 3: |
| 142 | + if count_h2: |
| 143 | + # If there was no H2 yet then H3 shouldn't have indent. |
| 144 | + indent = " " * 2 |
| 145 | + toc += indent + "* [%s](#%s)" % (title, hash_) + os.linesep |
| 146 | + tocsize += 1 |
| 147 | + if tocsize <= 1: |
| 148 | + # If there is only one H2/H3 heading do not create TOC. |
| 149 | + toc = "" |
| 150 | + tocsize = 0 |
| 151 | + return tocsize, toc, warnings |
| 152 | + |
| 153 | + |
| 154 | +if __name__ == "__main__": |
| 155 | + main() |
0 commit comments