@@ -81,15 +81,7 @@ def rearrange_archive(root):
8181 for fn in fnmatch .filter (os .listdir (root ), 'cppreference-export*.xml' ):
8282 os .remove (os .path .join (root , fn ))
8383
84- def add_file_to_rename_map (rename_map , dir , fn , new_fn ):
85- path = os .path .join (dir , fn )
86- if not os .path .isfile (path ):
87- print ("ERROR: Not renaming '{0}' because path does not exist" .format (path ))
88- return
89- rename_map .append ((dir , fn , new_fn ))
90-
91- # Converts complex URL to resources supplied by MediaWiki loader to a simplified
92- # name
84+ # Converts complex URL to resources supplied by MediaWiki loader to a simplified name
9385def convert_loader_name (fn ):
9486 if "modules=site&only=scripts" in fn :
9587 return "site_scripts.js"
@@ -105,55 +97,36 @@ def convert_loader_name(fn):
10597 raise Exception ('Loader file {0} does not match any known files' .format (fn ))
10698
10799def find_files_to_be_renamed (root ):
108- # Returns a rename map: array of tuples each of which contain three strings:
109- # the directory the file resides in, the source and destination filenames.
110-
111- # The rename map specifies files to be renamed in order to support them on
112- # windows filesystems which don't support certain characters in file names
113- rename_map = []
114-
115- files_rename = [] # general files to be renamed
116- files_loader = [] # files served by load.php. These should map to
117- # consistent and short file names because we
118- # modify some of them later in the pipeline
119-
120- for dir , _ , filenames in os .walk (root ):
121- filenames_loader = set (fnmatch .filter (filenames , 'load.php[?]*' ))
122- # match any filenames with '?"*' characters
123- filenames_rename = set (fnmatch .filter (filenames , '*[?"*]*' ))
124-
125- # don't process load.php files in general rename handler
126- filenames_rename -= filenames_loader
127-
128- for fn in filenames_loader :
129- files_loader .append ((dir , fn ))
130- for fn in filenames_rename :
131- files_rename .append ((dir , fn ))
132-
133- for dir , orig_fn in files_rename :
134- fn = orig_fn
135- fn = re .sub (r'\?.*' , '' , fn )
136- fn = fn .replace ('"' , '_q_' )
137- fn = fn .replace ('*' , '_star_' )
138- add_file_to_rename_map (rename_map , dir , orig_fn , fn )
139-
140- # map loader names to more recognizable names
141- for dir , fn in files_loader :
142- new_fn = convert_loader_name (fn )
143- add_file_to_rename_map (rename_map , dir , fn , new_fn )
144-
145- # rename filenames that conflict on case-insensitive filesystems
146- # TODO: perform this automatically
147- add_file_to_rename_map (rename_map , os .path .join (root , 'en/cpp/numeric/math' ), 'NAN.html' , 'NAN.2.html' )
148- add_file_to_rename_map (rename_map , os .path .join (root , 'en/c/numeric/math' ), 'NAN.html' , 'NAN.2.html' )
149- return rename_map
150-
151- def rename_files (rename_map ):
152- for dir , old_fn , new_fn in rename_map :
153- src_path = os .path .join (dir , old_fn )
154- dst_path = os .path .join (dir , new_fn )
155- print ("Renaming '{0}' to \n '{1}'" .format (src_path , dst_path ))
156- shutil .move (src_path , dst_path )
100+ # Returns a rename map: a map from old to new file name
101+ loader = re .compile (r'load\.php\?.*' )
102+ query = re .compile (r'\?.*' )
103+ result = dict ()
104+
105+ # find files with invalid names -> rename all occurrences
106+ for fn in set (fn for _ , _ , filenames in os .walk (root ) for fn in filenames ):
107+ if loader .match (fn ):
108+ result [fn ] = convert_loader_name (fn )
109+
110+ elif any ((c in fn ) for c in '?*"' ):
111+ new_fn = query .sub ('' , fn )
112+ new_fn = new_fn .replace ('"' , '_q_' )
113+ new_fn = new_fn .replace ('*' , '_star_' )
114+ result [fn ] = new_fn
115+
116+ # rename files that conflict on case-insensitive filesystems
117+ # TODO perform this automatically
118+ result ['NAN.html' ] = 'NAN.2.html'
119+
120+ return result
121+
122+ def rename_files (root , rename_map ):
123+ for dir , old_fn in ((dir , fn ) for dir , _ , filenames in os .walk (root ) for fn in filenames ):
124+ new_fn = rename_map .get (old_fn )
125+ if new_fn is not None :
126+ src_path = os .path .join (dir , old_fn )
127+ dst_path = os .path .join (dir , new_fn )
128+ print ("Renaming '{0}' to \n '{1}'" .format (src_path , dst_path ))
129+ shutil .move (src_path , dst_path )
157130
158131def find_html_files (root ):
159132 # find files that need to be preprocessed
@@ -199,26 +172,24 @@ def transform_ranges_placeholder(target, file, root):
199172 return os .path .relpath (abstarget , os .path .dirname (file ))
200173
201174def is_external_link (target ):
202- external_link_patterns = [
203- 'http://' ,
204- 'https://' ,
205- 'ftp://'
206- ]
207- for pattern in external_link_patterns :
208- if target .startswith (pattern ):
209- return True
210- return False
175+ url = urllib .parse .urlparse (target )
176+ return url .scheme != '' or url .netloc != ''
211177
212178def trasform_relative_link (rename_map , target ):
213- target = urllib .parse .unquote (target )
214- for _ , fn , new_fn in rename_map :
215- target = target .replace (fn , new_fn )
216- target = target .replace ('../../upload.cppreference.com/mwiki/' ,'../common/' )
217- target = target .replace ('../mwiki/' ,'../common/' )
218- target = re .sub (r'(\.php|\.css)\?.*' , r'\1' , target )
219- target = urllib .parse .quote (target )
220- target = target .replace ('%23' , '#' )
221- return target
179+ # urllib.parse tuple is (scheme, host, path, params, query, fragment)
180+ _ , _ , path , params , _ , fragment = urllib .parse .urlparse (target )
181+ assert params == ''
182+
183+ path = urllib .parse .unquote (path )
184+ path = path .replace ('../../upload.cppreference.com/mwiki/' ,'../common/' )
185+ path = path .replace ('../mwiki/' ,'../common/' )
186+
187+ dir , fn = os .path .split (path )
188+ fn = rename_map .get (fn , fn )
189+ path = os .path .join (dir , fn )
190+
191+ path = urllib .parse .quote (path )
192+ return urllib .parse .urlunparse (('' , '' , path , params , '' , fragment ))
222193
223194# Transforms a link in the given file according to rename map.
224195# target is the link to transform.
0 commit comments