Skip to content

Commit 5945f9c

Browse files
committed
Preprocess: Refactor renaming due to invalid characters
1 parent 41becd3 commit 5945f9c

File tree

1 file changed

+30
-20
lines changed

1 file changed

+30
-20
lines changed

preprocess.py

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -77,37 +77,44 @@ def rearrange_archive(root):
7777
# remove what's left
7878
shutil.rmtree(path)
7979

80-
# strip query strings from filenames to support Windows filesystems.
8180
def add_file_to_rename_map(rename_map, dir, fn, new_fn):
8281
path = os.path.join(dir, fn)
8382
if not os.path.isfile(path):
84-
print("Not renaming " + path)
83+
print("ERROR: Not renaming '{0}' because path does not exist".format(path))
8584
return
8685
rename_map.append((dir, fn, new_fn))
8786

8887
def find_files_to_be_renamed(root):
89-
# returns a rename map: array of tuples each of which contain three strings:
90-
# the directory the file resides in, the source and destination filenames
88+
# Returns a rename map: array of tuples each of which contain three strings:
89+
# the directory the file resides in, the source and destination filenames.
90+
91+
# The rename map specifies files to be renamed in order to support them on
92+
# windows filesystems which don't support certain characters in file names
9193
rename_map = []
9294

93-
files_rename_qs = [] # remove query string
94-
files_rename_quot = [] # remove quotes
95-
files_loader = [] # files served by load.php
95+
files_rename = [] # general files to be renamed
96+
files_loader = [] # files served by load.php. These should map to
97+
# consistent and short file names because we
98+
# modify some of them later in the pipeline
99+
96100
for dir, dirnames, filenames in os.walk(root):
97-
for filename in fnmatch.filter(filenames, '*[?]*'):
98-
files_rename_qs.append((dir, filename))
99-
for filename in fnmatch.filter(filenames, '*"*'):
100-
files_rename_quot.append((dir, filename))
101-
for filename in fnmatch.filter(filenames, 'load.php[?]*'):
102-
files_loader.append((dir, filename))
101+
filenames_loader = set(fnmatch.filter(filenames, 'load.php[?]*'))
102+
# match any filenames with '?"' characters
103+
filenames_rename = set(fnmatch.filter(filenames, '*[?"]*'))
103104

104-
for dir,fn in files_loader:
105-
files_rename_qs.remove((dir, fn))
105+
# don't process load.php files in general rename handler
106+
filenames_rename -= filenames_loader
107+
108+
for fn in filenames_loader:
109+
files_loader.append((dir, fn))
110+
for fn in filenames_rename:
111+
files_rename.append((dir, fn))
106112

107-
for dir,fn in files_rename_qs:
108-
add_file_to_rename_map(rename_map, dir, fn, re.sub('\?.*', '', fn))
109-
for dir,fn in files_rename_quot:
110-
add_file_to_rename_map(rename_map, dir, fn, re.sub('"', '_q_', fn))
113+
for dir,orig_fn in files_rename:
114+
fn = orig_fn
115+
fn = re.sub('\?.*', '', fn)
116+
fn = re.sub('"', '_q_', fn)
117+
add_file_to_rename_map(rename_map, dir, orig_fn, fn)
111118

112119
# map loader names to more recognizable names
113120
for dir,fn in files_loader:
@@ -135,7 +142,10 @@ def find_files_to_be_renamed(root):
135142

136143
def rename_files(rename_map):
137144
for dir, old_fn, new_fn in rename_map:
138-
shutil.move(os.path.join(dir, old_fn), os.path.join(dir, new_fn))
145+
src_path = os.path.join(dir, old_fn)
146+
dst_path = os.path.join(dir, new_fn)
147+
print("Renaming '{0}' to \n '{1}'".format(src_path, dst_path))
148+
shutil.move(src_path, dst_path)
139149

140150
def find_html_files(root):
141151
# find files that need to be preprocessed

0 commit comments

Comments
 (0)