2020# Since this should not require frequent updates, we just store this
2121# out-of-line and check the unicode.rs file into git.
2222
23- import fileinput , re , os , sys , operator
23+ import fileinput , re , os , sys
2424
2525preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
2626// file at the top-level directory of this distribution and at
@@ -59,7 +59,7 @@ def is_surrogate(n):
5959
6060def fetch (f ):
6161 if not os .path .exists (os .path .basename (f )):
62- os .system ("curl -O http://www.unicode.org/Public/UNIDATA /%s"
62+ os .system ("curl -O http://www.unicode.org/Public/9.0.0/ucd /%s"
6363 % f )
6464
6565 if not os .path .exists (os .path .basename (f )):
@@ -80,7 +80,7 @@ def load_gencats(f):
8080 if is_surrogate (cp ):
8181 continue
8282 if range_start >= 0 :
83- for i in xrange (range_start , cp ):
83+ for i in range (range_start , cp ):
8484 udict [i ] = data ;
8585 range_start = - 1 ;
8686 if data [1 ].endswith (", First>" ):
@@ -150,8 +150,8 @@ def format_table_content(f, content, indent):
150150def load_properties (f , interestingprops ):
151151 fetch (f )
152152 props = {}
153- re1 = re .compile ("^ *([0-9A-F]+) *; *(\w+)" )
154- re2 = re .compile ("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
153+ re1 = re .compile (r "^ *([0-9A-F]+) *; *(\w+)" )
154+ re2 = re .compile (r "^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)" )
155155
156156 for line in fileinput .input (os .path .basename (f )):
157157 prop = None
@@ -309,7 +309,7 @@ def emit_break_module(f, break_table, break_cats, name):
309309 # download and parse all the data
310310 fetch ("ReadMe.txt" )
311311 with open ("ReadMe.txt" ) as readme :
312- pattern = "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
312+ pattern = r "for Version (\d+)\.(\d+)\.(\d+) of the Unicode"
313313 unicode_version = re .search (pattern , readme .read ()).groups ()
314314 rf .write ("""
315315/// The version of [Unicode](http://www.unicode.org/)
@@ -342,19 +342,19 @@ def emit_break_module(f, break_table, break_cats, name):
342342 for cat in grapheme_cats :
343343 grapheme_table .extend ([(x , y , cat ) for (x , y ) in grapheme_cats [cat ]])
344344 grapheme_table .sort (key = lambda w : w [0 ])
345- emit_break_module (rf , grapheme_table , grapheme_cats .keys (), "grapheme" )
345+ emit_break_module (rf , grapheme_table , list ( grapheme_cats .keys () ), "grapheme" )
346346 rf .write ("\n " )
347347
348348 word_cats = load_properties ("auxiliary/WordBreakProperty.txt" , [])
349349 word_table = []
350350 for cat in word_cats :
351351 word_table .extend ([(x , y , cat ) for (x , y ) in word_cats [cat ]])
352352 word_table .sort (key = lambda w : w [0 ])
353- emit_break_module (rf , word_table , word_cats .keys (), "word" )
353+ emit_break_module (rf , word_table , list ( word_cats .keys () ), "word" )
354354
355355 sentence_cats = load_properties ("auxiliary/SentenceBreakProperty.txt" , [])
356356 sentence_table = []
357357 for cat in sentence_cats :
358358 sentence_table .extend ([(x , y , cat ) for (x , y ) in sentence_cats [cat ]])
359359 sentence_table .sort (key = lambda w : w [0 ])
360- emit_break_module (rf , sentence_table , sentence_cats .keys (), "sentence" )
360+ emit_break_module (rf , sentence_table , list ( sentence_cats .keys () ), "sentence" )
0 commit comments