1616import re
1717import unicodedata
1818
19+
20+ def normalize (f ):
21+ """
22+ OSX and Linux filesystems encode composite characters differently in filenames.
23+ We should normalize to NFC: http://unicode.org/reports/tr15/.
24+ """
25+ f = unicodedata .normalize ('NFC' , unicode (f , 'utf-8' )).encode ('utf-8' )
26+ return f
27+
1928os .chdir ("src" )
2029rulesets_fn = "chrome/content/rules/default.rulesets"
21- xml_ruleset_files = glob ("chrome/content/rules/*.xml" )
30+ xml_ruleset_files = map ( normalize , glob ("chrome/content/rules/*.xml" ) )
2231
2332# cleanup after bugs :/
2433misfile = rulesets_fn + "r"
@@ -53,12 +62,6 @@ def clean_up(rulefile):
5362 rulefile = re .sub (r"\s*(/>|<ruleset)" , r"\1" , rulefile )
5463 return rulefile
5564
56- def normalize (f ):
57- """Turn OSX's weird unicode decomposition for filenames into something normal"""
58- if os .uname ()[0 ] == 'Darwin' :
59- f = unicodedata .normalize ('NFC' , unicode (f , 'utf-8' )).encode ('utf-8' )
60- return f
61-
6265library = open (rulesets_fn ,"w" )
6366
6467try :
@@ -73,7 +76,7 @@ def normalize(f):
7376
7477for rfile in sorted (xml_ruleset_files ):
7578 ruleset = open (rfile ).read ()
76- fn = normalize ( os .path .basename (rfile ) )
79+ fn = os .path .basename (rfile )
7780 ruleset = ruleset .replace ("<ruleset" , '<ruleset f="%s"' % fn , 1 )
7881 library .write (clean_up (ruleset ))
7982library .write ("</rulesetlibrary>\n " )
0 commit comments