Skip to content

Commit fb45ad8

Browse files
committed
Normalize unicode before sorting
1 parent 7c0f371 commit fb45ad8

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

utils/merge-rulesets.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,18 @@
1616
import re
1717
import unicodedata
1818

19+
20+
def normalize(f):
21+
"""
22+
OSX and Linux filesystems encode composite characters differently in filenames.
23+
We should normalize to NFC: http://unicode.org/reports/tr15/.
24+
"""
25+
f = unicodedata.normalize('NFC', unicode(f, 'utf-8')).encode('utf-8')
26+
return f
27+
1928
os.chdir("src")
2029
rulesets_fn="chrome/content/rules/default.rulesets"
21-
xml_ruleset_files = glob("chrome/content/rules/*.xml")
30+
xml_ruleset_files = map(normalize, glob("chrome/content/rules/*.xml"))
2231

2332
# cleanup after bugs :/
2433
misfile = rulesets_fn + "r"
@@ -53,12 +62,6 @@ def clean_up(rulefile):
5362
rulefile = re.sub(r"\s*(/>|<ruleset)", r"\1", rulefile)
5463
return rulefile
5564

56-
def normalize(f):
57-
"""Turn OSX's weird unicode decomposition for filenames into something normal"""
58-
if os.uname()[0] == 'Darwin':
59-
f = unicodedata.normalize('NFC', unicode(f, 'utf-8')).encode('utf-8')
60-
return f
61-
6265
library = open(rulesets_fn,"w")
6366

6467
try:
@@ -73,7 +76,7 @@ def normalize(f):
7376

7477
for rfile in sorted(xml_ruleset_files):
7578
ruleset = open(rfile).read()
76-
fn=normalize(os.path.basename(rfile))
79+
fn = os.path.basename(rfile)
7780
ruleset = ruleset.replace("<ruleset", '<ruleset f="%s"' % fn, 1)
7881
library.write(clean_up(ruleset))
7982
library.write("</rulesetlibrary>\n")

0 commit comments

Comments
 (0)