3838
3939 [some website](http://github.com/)
4040 [another website](https://github.com/)
41- [local header ](#some-header )
41+ [local link ](#some-title )
4242"""
4343
4444import os
4747import common
4848
4949
50- def check (filepath , target ):
50+ def check (this_file , target , title , titledict ):
5151 """Check if a link's target is like it should be.
5252
5353 Return an error message string or "ok".
@@ -57,45 +57,99 @@ def check(filepath, target):
5757 # be added later.
5858 return "ok"
5959
60- if '#' in target :
61- where = target .index ('#' )
62- if where == 0 :
63- # It's a link to a title in the same file, we need to skip it.
64- return "ok"
65- target = target [:where ]
60+ path = posixpath .join (posixpath .dirname (this_file ), target )
61+ path = posixpath .normpath (path )
62+ real_path = common .slashfix (path )
6663
67- path = posixpath .join (posixpath .dirname (filepath ), target )
68- realpath = common .slashfix (path )
69- if not os .path .exists (realpath ):
64+ if not os .path .exists (real_path ):
7065 return "doesn't exist"
66+
7167 if target .endswith ('/' ):
7268 # A directory.
73- if os .path .isdir (realpath ):
74- return "ok"
75- return "not a directory"
69+ if not os .path .isdir (real_path ):
70+ return "not a directory"
7671 else :
7772 # A file.
78- if os .path .isfile (realpath ):
79- return "ok"
80- return "not a file"
73+ if not os .path .isfile (real_path ):
74+ return "not a file"
75+
76+ if title is not None and title not in titledict [path ]:
77+ return "no title named %s" % title
78+ return "ok"
79+
80+
81+ def find_titles (filename ):
82+ """Read titles of a markdown file and return a list of them."""
83+ result = []
84+
85+ with common .slashfix_open (filename , 'r' ) as f :
86+ for line in f :
87+ if line .startswith ('```' ):
88+ # it's a code block, let's skip to the end of it to
89+ # avoid detecting comments as titles
90+ while f .readline ().rstrip () != '```' :
91+ pass
92+ if line .startswith ('#' ):
93+ # found a title
94+ result .append (common .header_link (line .lstrip ('#' ).strip ()))
95+
96+ return result
97+
98+
99+ def find_links (this_file ):
100+ """Read links of a markdown file.
101+
102+ Return a list of (target, title, lineno) pairs where title can be None.
103+ """
104+ result = []
105+
106+ with common .slashfix_open (this_file , 'r' ) as f :
107+ for match , lineno in common .find_links (f ):
108+ target = match .group (2 )
109+ if '#' in target :
110+ file , title = target .split ('#' , 1 )
111+ if not file :
112+ # link to this file, [blabla](#hi)
113+ file = posixpath .basename (this_file )
114+ else :
115+ file = target
116+ title = None
117+
118+ result .append ((file , title , lineno ))
119+
120+ return result
121+
122+
123+ def get_line (filename , lineno ):
124+ """Return the lineno'th line of a file."""
125+ with common .slashfix_open (filename , 'r' ) as f :
126+ for lineno2 , line in enumerate (f , start = 1 ):
127+ if lineno == lineno2 :
128+ return line
129+ raise ValueError ("%s is less than %d lines long" % (filename , lineno ))
81130
82131
83132def main ():
84- print ("Searching and checking links..." )
85- broken = 0
86- total = 0
133+ print ("Searching for titles and links..." )
134+ titledict = {} # {filename: [title1, title2, ...]}
135+ linkdict = {} # {filename: [(file, title, lineno), ...])
87136 for path in common .get_markdown_files ():
88- with common .slashfix_open (path , 'r' ) as f :
89- for match , lineno in common .find_links (f ):
90- text , target = match .groups ()
91- status = check (path , target )
92- if status != "ok" :
93- # The .group(0) is not perfect, but it's good enough.
94- print (" file %s, line %d: %s" % (path , lineno , status ))
95- print (" " + match .group (0 ))
96- print ()
97- broken += 1
98- total += 1
137+ titledict [path ] = find_titles (path )
138+ linkdict [path ] = find_links (path )
139+
140+ print ("Checking the links..." )
141+ total = 0
142+ broken = 0
143+
144+ for filename , linklist in linkdict .items ():
145+ for target , title , lineno in linklist :
146+ status = check (filename , target , title , titledict )
147+ if status != "ok" :
148+ print (" file %s, line %d: %s" % (filename , lineno , status ))
149+ print (" %s" % get_line (filename , lineno ))
150+ broken += 1
151+ total += 1
152+
99153 print ("%d/%d links seem to be broken." % (broken , total ))
100154
101155
0 commit comments