Skip to content

Commit fe15da6

Browse files
mpvlrsc
authored andcommitted
unicode: upgrade to 8.0.0
Not sure if I'm on time for 1.5; Unicode 8 just got released. Straighforward upgrade. Only changed maketables.go to prevent it from adding the Cherokee upper and lower case mappings. This change causes the caseOrbit table to NOT change. Added tests to verify that the relevant functions still produce the correct result, even for Cherokee. Fixes golang#11309 Change-Id: I42850f5b3399bde125b002efc78eff96dbd86a08 Reviewed-on: https://go-review.googlesource.com/11286 Reviewed-by: Russ Cox <rsc@golang.org>
1 parent 834fef8 commit fe15da6

File tree

7 files changed

+437
-215
lines changed

7 files changed

+437
-215
lines changed

api/except.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,3 +328,4 @@ pkg syscall (netbsd-arm), type IfMsghdr struct, Pad_cgo_1 [4]uint8
328328
pkg syscall (netbsd-arm-cgo), const SizeofIfData = 132
329329
pkg syscall (netbsd-arm-cgo), type IfMsghdr struct, Pad_cgo_1 [4]uint8
330330
pkg unicode, const Version = "6.3.0"
331+
pkg unicode, const Version = "7.0.0"

api/next.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -948,3 +948,10 @@ pkg syscall (openbsd-amd64-cgo), type SysProcAttr struct, Pgid int
948948
pkg text/template, method (*Template) DefinedTemplates() string
949949
pkg text/template, method (*Template) Option(...string) *Template
950950
pkg time, method (Time) AppendFormat([]uint8, string) []uint8
951+
pkg unicode, const Version = "8.0.0"
952+
pkg unicode, var Ahom *RangeTable
953+
pkg unicode, var Anatolian_Hieroglyphs *RangeTable
954+
pkg unicode, var Hatran *RangeTable
955+
pkg unicode, var Multani *RangeTable
956+
pkg unicode, var Old_Hungarian *RangeTable
957+
pkg unicode, var SignWriting *RangeTable

src/strconv/isprint.go

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
package strconv
99

10-
// (468+138+67)*2 + (326)*4 = 2650 bytes
10+
// (470+136+73)*2 + (342)*4 = 2726 bytes
1111

1212
var isPrint16 = []uint16{
1313
0x0020, 0x007e,
@@ -26,8 +26,8 @@ var isPrint16 = []uint16{
2626
0x0800, 0x082d,
2727
0x0830, 0x085b,
2828
0x085e, 0x085e,
29-
0x08a0, 0x08b2,
30-
0x08e4, 0x098c,
29+
0x08a0, 0x08b4,
30+
0x08e3, 0x098c,
3131
0x098f, 0x0990,
3232
0x0993, 0x09b2,
3333
0x09b6, 0x09b9,
@@ -51,6 +51,7 @@ var isPrint16 = []uint16{
5151
0x0ad0, 0x0ad0,
5252
0x0ae0, 0x0ae3,
5353
0x0ae6, 0x0af1,
54+
0x0af9, 0x0af9,
5455
0x0b01, 0x0b0c,
5556
0x0b0f, 0x0b10,
5657
0x0b13, 0x0b39,
@@ -73,7 +74,7 @@ var isPrint16 = []uint16{
7374
0x0be6, 0x0bfa,
7475
0x0c00, 0x0c39,
7576
0x0c3d, 0x0c4d,
76-
0x0c55, 0x0c59,
77+
0x0c55, 0x0c5a,
7778
0x0c60, 0x0c63,
7879
0x0c66, 0x0c6f,
7980
0x0c78, 0x0cb9,
@@ -84,7 +85,7 @@ var isPrint16 = []uint16{
8485
0x0d01, 0x0d3a,
8586
0x0d3d, 0x0d4e,
8687
0x0d57, 0x0d57,
87-
0x0d60, 0x0d63,
88+
0x0d5f, 0x0d63,
8889
0x0d66, 0x0d75,
8990
0x0d79, 0x0d7f,
9091
0x0d82, 0x0d96,
@@ -117,7 +118,8 @@ var isPrint16 = []uint16{
117118
0x1318, 0x135a,
118119
0x135d, 0x137c,
119120
0x1380, 0x1399,
120-
0x13a0, 0x13f4,
121+
0x13a0, 0x13f5,
122+
0x13f8, 0x13fd,
121123
0x1400, 0x169c,
122124
0x16a0, 0x16f8,
123125
0x1700, 0x1714,
@@ -167,16 +169,17 @@ var isPrint16 = []uint16{
167169
0x2030, 0x205e,
168170
0x2070, 0x2071,
169171
0x2074, 0x209c,
170-
0x20a0, 0x20bd,
172+
0x20a0, 0x20be,
171173
0x20d0, 0x20f0,
172-
0x2100, 0x2189,
174+
0x2100, 0x218b,
173175
0x2190, 0x23fa,
174176
0x2400, 0x2426,
175177
0x2440, 0x244a,
176178
0x2460, 0x2b73,
177179
0x2b76, 0x2b95,
178180
0x2b98, 0x2bb9,
179181
0x2bbd, 0x2bd1,
182+
0x2bec, 0x2bef,
180183
0x2c00, 0x2cf3,
181184
0x2cf9, 0x2d27,
182185
0x2d2d, 0x2d2d,
@@ -193,19 +196,19 @@ var isPrint16 = []uint16{
193196
0x3131, 0x31ba,
194197
0x31c0, 0x31e3,
195198
0x31f0, 0x4db5,
196-
0x4dc0, 0x9fcc,
199+
0x4dc0, 0x9fd5,
197200
0xa000, 0xa48c,
198201
0xa490, 0xa4c6,
199202
0xa4d0, 0xa62b,
200203
0xa640, 0xa6f7,
201204
0xa700, 0xa7ad,
202-
0xa7b0, 0xa7b1,
205+
0xa7b0, 0xa7b7,
203206
0xa7f7, 0xa82b,
204207
0xa830, 0xa839,
205208
0xa840, 0xa877,
206209
0xa880, 0xa8c4,
207210
0xa8ce, 0xa8d9,
208-
0xa8e0, 0xa8fb,
211+
0xa8e0, 0xa8fd,
209212
0xa900, 0xa953,
210213
0xa95f, 0xa97c,
211214
0xa980, 0xa9d9,
@@ -217,9 +220,8 @@ var isPrint16 = []uint16{
217220
0xab01, 0xab06,
218221
0xab09, 0xab0e,
219222
0xab11, 0xab16,
220-
0xab20, 0xab5f,
221-
0xab64, 0xab65,
222-
0xabc0, 0xabed,
223+
0xab20, 0xab65,
224+
0xab70, 0xabed,
223225
0xabf0, 0xabf9,
224226
0xac00, 0xd7a3,
225227
0xd7b0, 0xd7c6,
@@ -234,8 +236,7 @@ var isPrint16 = []uint16{
234236
0xfd92, 0xfdc7,
235237
0xfdf0, 0xfdfd,
236238
0xfe00, 0xfe19,
237-
0xfe20, 0xfe2d,
238-
0xfe30, 0xfe6b,
239+
0xfe20, 0xfe6b,
239240
0xfe70, 0xfefc,
240241
0xff01, 0xffbe,
241242
0xffc2, 0xffc7,
@@ -370,8 +371,6 @@ var isNotPrint16 = []uint16{
370371
0x318f,
371372
0x321f,
372373
0x32ff,
373-
0xa69e,
374-
0xa78f,
375374
0xa9ce,
376375
0xa9ff,
377376
0xab27,
@@ -418,12 +417,13 @@ var isPrint32 = []uint32{
418417
0x01083c, 0x01083c,
419418
0x01083f, 0x01089e,
420419
0x0108a7, 0x0108af,
421-
0x010900, 0x01091b,
420+
0x0108e0, 0x0108f5,
421+
0x0108fb, 0x01091b,
422422
0x01091f, 0x010939,
423423
0x01093f, 0x01093f,
424424
0x010980, 0x0109b7,
425-
0x0109be, 0x0109bf,
426-
0x010a00, 0x010a06,
425+
0x0109bc, 0x0109cf,
426+
0x0109d2, 0x010a06,
427427
0x010a0c, 0x010a33,
428428
0x010a38, 0x010a3a,
429429
0x010a3f, 0x010a47,
@@ -438,6 +438,9 @@ var isPrint32 = []uint32{
438438
0x010b99, 0x010b9c,
439439
0x010ba9, 0x010baf,
440440
0x010c00, 0x010c48,
441+
0x010c80, 0x010cb2,
442+
0x010cc0, 0x010cf2,
443+
0x010cfa, 0x010cff,
441444
0x010e60, 0x010e7e,
442445
0x011000, 0x01104d,
443446
0x011052, 0x01106f,
@@ -446,37 +449,42 @@ var isPrint32 = []uint32{
446449
0x0110f0, 0x0110f9,
447450
0x011100, 0x011143,
448451
0x011150, 0x011176,
449-
0x011180, 0x0111c8,
450-
0x0111cd, 0x0111cd,
451-
0x0111d0, 0x0111da,
452-
0x0111e1, 0x0111f4,
452+
0x011180, 0x0111cd,
453+
0x0111d0, 0x0111f4,
453454
0x011200, 0x01123d,
455+
0x011280, 0x0112a9,
454456
0x0112b0, 0x0112ea,
455457
0x0112f0, 0x0112f9,
456-
0x011301, 0x01130c,
458+
0x011300, 0x01130c,
457459
0x01130f, 0x011310,
458460
0x011313, 0x011339,
459461
0x01133c, 0x011344,
460462
0x011347, 0x011348,
461463
0x01134b, 0x01134d,
464+
0x011350, 0x011350,
462465
0x011357, 0x011357,
463466
0x01135d, 0x011363,
464467
0x011366, 0x01136c,
465468
0x011370, 0x011374,
466469
0x011480, 0x0114c7,
467470
0x0114d0, 0x0114d9,
468471
0x011580, 0x0115b5,
469-
0x0115b8, 0x0115c9,
472+
0x0115b8, 0x0115dd,
470473
0x011600, 0x011644,
471474
0x011650, 0x011659,
472475
0x011680, 0x0116b7,
473476
0x0116c0, 0x0116c9,
477+
0x011700, 0x011719,
478+
0x01171d, 0x01172b,
479+
0x011730, 0x01173f,
474480
0x0118a0, 0x0118f2,
475481
0x0118ff, 0x0118ff,
476482
0x011ac0, 0x011af8,
477-
0x012000, 0x012398,
483+
0x012000, 0x012399,
478484
0x012400, 0x012474,
485+
0x012480, 0x012543,
479486
0x013000, 0x01342e,
487+
0x014400, 0x014646,
480488
0x016800, 0x016a38,
481489
0x016a40, 0x016a69,
482490
0x016a6e, 0x016a6f,
@@ -497,7 +505,7 @@ var isPrint32 = []uint32{
497505
0x01d000, 0x01d0f5,
498506
0x01d100, 0x01d126,
499507
0x01d129, 0x01d172,
500-
0x01d17b, 0x01d1dd,
508+
0x01d17b, 0x01d1e8,
501509
0x01d200, 0x01d245,
502510
0x01d300, 0x01d356,
503511
0x01d360, 0x01d371,
@@ -508,7 +516,8 @@ var isPrint32 = []uint32{
508516
0x01d50d, 0x01d546,
509517
0x01d54a, 0x01d6a5,
510518
0x01d6a8, 0x01d7cb,
511-
0x01d7ce, 0x01d7ff,
519+
0x01d7ce, 0x01da8b,
520+
0x01da9b, 0x01daaf,
512521
0x01e800, 0x01e8c4,
513522
0x01e8c7, 0x01e8d6,
514523
0x01ee00, 0x01ee24,
@@ -530,13 +539,7 @@ var isPrint32 = []uint32{
530539
0x01f210, 0x01f23a,
531540
0x01f240, 0x01f248,
532541
0x01f250, 0x01f251,
533-
0x01f300, 0x01f32c,
534-
0x01f330, 0x01f37d,
535-
0x01f380, 0x01f3ce,
536-
0x01f3d4, 0x01f3f7,
537-
0x01f400, 0x01f54a,
538-
0x01f550, 0x01f642,
539-
0x01f645, 0x01f6cf,
542+
0x01f300, 0x01f6d0,
540543
0x01f6e0, 0x01f6ec,
541544
0x01f6f0, 0x01f6f3,
542545
0x01f700, 0x01f773,
@@ -546,9 +549,13 @@ var isPrint32 = []uint32{
546549
0x01f850, 0x01f859,
547550
0x01f860, 0x01f887,
548551
0x01f890, 0x01f8ad,
552+
0x01f910, 0x01f918,
553+
0x01f980, 0x01f984,
554+
0x01f9c0, 0x01f9c0,
549555
0x020000, 0x02a6d6,
550556
0x02a700, 0x02b734,
551557
0x02b740, 0x02b81d,
558+
0x02b820, 0x02cea1,
552559
0x02f800, 0x02fa1d,
553560
0x0e0100, 0x0e01ef,
554561
}
@@ -562,12 +569,18 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
562569
0x0809,
563570
0x0836,
564571
0x0856,
572+
0x08f3,
565573
0x0a04,
566574
0x0a14,
567575
0x0a18,
568576
0x10bd,
569577
0x1135,
578+
0x11e0,
570579
0x1212,
580+
0x1287,
581+
0x1289,
582+
0x128e,
583+
0x129e,
571584
0x1304,
572585
0x1329,
573586
0x1331,
@@ -589,6 +602,7 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
589602
0xd53f,
590603
0xd545,
591604
0xd551,
605+
0xdaa0,
592606
0xee04,
593607
0xee20,
594608
0xee23,
@@ -618,7 +632,6 @@ var isNotPrint32 = []uint16{ // add 0x10000 to each entry
618632
0xf0c0,
619633
0xf0d0,
620634
0xf12f,
621-
0xf4ff,
622635
0xf57a,
623636
0xf5a4,
624637
}

src/unicode/letter_test.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ var upperTest = []rune{
2424
0x181,
2525
0x376,
2626
0x3cf,
27+
0x13bd,
2728
0x1f2a,
2829
0x2102,
2930
0x2c00,
@@ -46,6 +47,7 @@ var notupperTest = []rune{
4647
0x377,
4748
0x387,
4849
0x2150,
50+
0xab7d,
4951
0xffff,
5052
0x10000,
5153
}
@@ -194,6 +196,15 @@ var caseTest = []caseT{
194196
{LowerCase, 0x0148, 0x0148},
195197
{TitleCase, 0x0148, 0x0147},
196198

199+
// Lowercase lower than uppercase.
200+
// AB78;CHEROKEE SMALL LETTER GE;Ll;0;L;;;;;N;;;13A8;;13A8
201+
{UpperCase, 0xab78, 0x13a8},
202+
{LowerCase, 0xab78, 0xab78},
203+
{TitleCase, 0xab78, 0x13a8},
204+
{UpperCase, 0x13a8, 0x13a8},
205+
{LowerCase, 0x13a8, 0xab78},
206+
{TitleCase, 0x13a8, 0x13a8},
207+
197208
// Last block in the 5.1.0 table
198209
// 10400;DESERET CAPITAL LETTER LONG I;Lu;0;L;;;;;N;;;;10428;
199210
{UpperCase, 0x10400, 0x10400},
@@ -405,6 +416,9 @@ var simpleFoldTests = []string{
405416
// Extra special cases: has lower/upper but no case fold.
406417
"İ",
407418
"ı",
419+
420+
// Upper comes before lower (Cherokee).
421+
"\u13b0\uab80",
408422
}
409423

410424
func TestSimpleFold(t *testing.T) {

src/unicode/maketables.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func main() {
4444
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
4545
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
4646
var url = flag.String("url",
47-
"http://www.unicode.org/Public/7.0.0/ucd/",
47+
"http://www.unicode.org/Public/8.0.0/ucd/",
4848
"URL of Unicode database directory")
4949
var tablelist = flag.String("tables",
5050
"all",
@@ -1152,11 +1152,14 @@ func printCasefold() {
11521152
}
11531153
}
11541154

1155-
// Delete the groups for which assuming [lower, upper] is right.
1155+
// Delete the groups for which assuming [lower, upper] or [upper, lower] is right.
11561156
for i, orb := range caseOrbit {
11571157
if len(orb) == 2 && chars[orb[0]].upperCase == orb[1] && chars[orb[1]].lowerCase == orb[0] {
11581158
caseOrbit[i] = nil
11591159
}
1160+
if len(orb) == 2 && chars[orb[1]].upperCase == orb[0] && chars[orb[0]].lowerCase == orb[1] {
1161+
caseOrbit[i] = nil
1162+
}
11601163
}
11611164

11621165
// Record orbit information in chars.

0 commit comments

Comments
 (0)