@@ -15,80 +15,106 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
1515 # invalid bytes
1616 (b"abc\x81 \x00 \x81 \x00 \x82 \x84 " , "strict" , None ),
1717 (b"abc\xf8 " , "strict" , None ),
18- (b"abc\x81 \x00 \x82 \x84 " , "replace" , "abc\ufffd \uff44 " ),
19- (b"abc\x81 \x00 \x82 \x84 \x88 " , "replace" , "abc\ufffd \uff44 \ufffd " ),
20- (b"abc\x81 \x00 \x82 \x84 " , "ignore" , "abc\uff44 " ),
18+ (b"abc\x81 \x00 \x82 \x84 " , "replace" , "abc\ufffd \x00 \uff44 " ),
19+ (b"abc\x81 \x00 \x82 \x84 \x88 " , "replace" , "abc\ufffd \x00 \uff44 \ufffd " ),
20+ (b"abc\x81 \x00 \x82 \x84 " , "ignore" , "abc\x00 \uff44 " ),
21+ (b"ab\xEB xy" , "replace" , "ab\uFFFD xy" ),
22+ (b"ab\xF0 \x39 xy" , "replace" , "ab\uFFFD 9xy" ),
23+ (b"ab\xEA \xF0 xy" , "replace" , 'ab\ufffd \ue038 y' ),
2124 # sjis vs cp932
2225 (b"\\ \x7e " , "replace" , "\\ \x7e " ),
2326 (b"\x81 \x5f \x81 \x61 \x81 \x7c " , "replace" , "\uff3c \u2225 \uff0d " ),
2427 )
2528
29+ euc_commontests = (
30+ # invalid bytes
31+ (b"abc\x80 \x80 \xc1 \xc4 " , "strict" , None ),
32+ (b"abc\x80 \x80 \xc1 \xc4 " , "replace" , "abc\ufffd \ufffd \u7956 " ),
33+ (b"abc\x80 \x80 \xc1 \xc4 \xc8 " , "replace" , "abc\ufffd \ufffd \u7956 \ufffd " ),
34+ (b"abc\x80 \x80 \xc1 \xc4 " , "ignore" , "abc\u7956 " ),
35+ (b"abc\xc8 " , "strict" , None ),
36+ (b"abc\x8f \x83 \x83 " , "replace" , "abc\ufffd \ufffd \ufffd " ),
37+ (b"\x82 \xFC xy" , "replace" , "\ufffd \ufffd xy" ),
38+ (b"\xc1 \x64 " , "strict" , None ),
39+ (b"\xa1 \xc0 " , "strict" , "\uff3c " ),
40+ (b"\xa1 \xc0 \\ " , "strict" , "\uff3c \\ " ),
41+ (b"\x8e XY" , "replace" , "\ufffd XY" ),
42+ )
43+
44+ class Test_EUC_JIS_2004 (test_multibytecodec_support .TestBase ,
45+ unittest .TestCase ):
46+ encoding = 'euc_jis_2004'
47+ tstring = test_multibytecodec_support .load_teststring ('euc_jisx0213' )
48+ codectests = euc_commontests
49+ xmlcharnametest = (
50+ "\xab \u211c \xbb = \u2329 \u1234 \u232a " ,
51+ b"\xa9 \xa8 ℜ\xa9 \xb2 = ⟨ሴ⟩"
52+ )
53+
2654class Test_EUC_JISX0213 (test_multibytecodec_support .TestBase ,
2755 unittest .TestCase ):
2856 encoding = 'euc_jisx0213'
2957 tstring = test_multibytecodec_support .load_teststring ('euc_jisx0213' )
30- codectests = (
31- # invalid bytes
32- (b"abc\x80 \x80 \xc1 \xc4 " , "strict" , None ),
33- (b"abc\xc8 " , "strict" , None ),
34- (b"abc\x80 \x80 \xc1 \xc4 " , "replace" , "abc\ufffd \u7956 " ),
35- (b"abc\x80 \x80 \xc1 \xc4 \xc8 " , "replace" , "abc\ufffd \u7956 \ufffd " ),
36- (b"abc\x80 \x80 \xc1 \xc4 " , "ignore" , "abc\u7956 " ),
37- (b"abc\x8f \x83 \x83 " , "replace" , "abc\ufffd " ),
38- (b"\xc1 \x64 " , "strict" , None ),
39- (b"\xa1 \xc0 " , "strict" , "\uff3c " ),
40- )
58+ codectests = euc_commontests
4159 xmlcharnametest = (
4260 "\xab \u211c \xbb = \u2329 \u1234 \u232a " ,
4361 b"\xa9 \xa8 ℜ\xa9 \xb2 = ⟨ሴ⟩"
4462 )
4563
46- eucjp_commontests = (
47- (b"abc\x80 \x80 \xc1 \xc4 " , "strict" , None ),
48- (b"abc\xc8 " , "strict" , None ),
49- (b"abc\x80 \x80 \xc1 \xc4 " , "replace" , "abc\ufffd \u7956 " ),
50- (b"abc\x80 \x80 \xc1 \xc4 \xc8 " , "replace" , "abc\ufffd \u7956 \ufffd " ),
51- (b"abc\x80 \x80 \xc1 \xc4 " , "ignore" , "abc\u7956 " ),
52- (b"abc\x8f \x83 \x83 " , "replace" , "abc\ufffd " ),
53- (b"\xc1 \x64 " , "strict" , None ),
54- )
55-
5664class Test_EUC_JP_COMPAT (test_multibytecodec_support .TestBase ,
5765 unittest .TestCase ):
5866 encoding = 'euc_jp'
5967 tstring = test_multibytecodec_support .load_teststring ('euc_jp' )
60- codectests = eucjp_commontests + (
61- (b"\xa1 \xc0 \\ " , "strict" , "\uff3c \\ " ),
68+ codectests = euc_commontests + (
6269 ("\xa5 " , "strict" , b"\x5c " ),
6370 ("\u203e " , "strict" , b"\x7e " ),
6471 )
6572
6673shiftjis_commonenctests = (
6774 (b"abc\x80 \x80 \x82 \x84 " , "strict" , None ),
6875 (b"abc\xf8 " , "strict" , None ),
69- (b"abc\x80 \x80 \x82 \x84 " , "replace" , "abc\ufffd \uff44 " ),
70- (b"abc\x80 \x80 \x82 \x84 \x88 " , "replace" , "abc\ufffd \uff44 \ufffd " ),
7176 (b"abc\x80 \x80 \x82 \x84 def" , "ignore" , "abc\uff44 def" ),
7277)
7378
7479class Test_SJIS_COMPAT (test_multibytecodec_support .TestBase , unittest .TestCase ):
7580 encoding = 'shift_jis'
7681 tstring = test_multibytecodec_support .load_teststring ('shift_jis' )
7782 codectests = shiftjis_commonenctests + (
83+ (b"abc\x80 \x80 \x82 \x84 " , "replace" , "abc\ufffd \ufffd \uff44 " ),
84+ (b"abc\x80 \x80 \x82 \x84 \x88 " , "replace" , "abc\ufffd \ufffd \uff44 \ufffd " ),
85+
7886 (b"\\ \x7e " , "strict" , "\\ \x7e " ),
7987 (b"\x81 \x5f \x81 \x61 \x81 \x7c " , "strict" , "\uff3c \u2016 \u2212 " ),
88+ (b"abc\x81 \x39 " , "replace" , "abc\ufffd 9" ),
89+ (b"abc\xEA \xFC " , "replace" , "abc\ufffd \ufffd " ),
90+ (b"abc\xFF \x58 " , "replace" , "abc\ufffd X" ),
91+ )
92+
93+ class Test_SJIS_2004 (test_multibytecodec_support .TestBase , unittest .TestCase ):
94+ encoding = 'shift_jis_2004'
95+ tstring = test_multibytecodec_support .load_teststring ('shift_jis' )
96+ codectests = shiftjis_commonenctests + (
97+ (b"\\ \x7e " , "strict" , "\xa5 \u203e " ),
98+ (b"\x81 \x5f \x81 \x61 \x81 \x7c " , "strict" , "\\ \u2016 \u2212 " ),
99+ (b"abc\xEA \xFC " , "strict" , "abc\u64bf " ),
100+ (b"\x81 \x39 xy" , "replace" , "\ufffd 9xy" ),
101+ (b"\xFF \x58 xy" , "replace" , "\ufffd Xxy" ),
102+ (b"\x80 \x80 \x82 \x84 xy" , "replace" , "\ufffd \ufffd \uff44 xy" ),
103+ (b"\x80 \x80 \x82 \x84 \x88 xy" , "replace" , "\ufffd \ufffd \uff44 \u5864 y" ),
104+ (b"\xFC \xFB xy" , "replace" , '\ufffd \u95b4 y' ),
105+ )
106+ xmlcharnametest = (
107+ "\xab \u211c \xbb = \u2329 \u1234 \u232a " ,
108+ b"\x85 Gℜ\x85 Q = ⟨ሴ⟩"
80109 )
81110
82111class Test_SJISX0213 (test_multibytecodec_support .TestBase , unittest .TestCase ):
83112 encoding = 'shift_jisx0213'
84113 tstring = test_multibytecodec_support .load_teststring ('shift_jisx0213' )
85- codectests = (
86- # invalid bytes
87- (b"abc\x80 \x80 \x82 \x84 " , "strict" , None ),
88- (b"abc\xf8 " , "strict" , None ),
89- (b"abc\x80 \x80 \x82 \x84 " , "replace" , "abc\ufffd \uff44 " ),
90- (b"abc\x80 \x80 \x82 \x84 \x88 " , "replace" , "abc\ufffd \uff44 \ufffd " ),
91- (b"abc\x80 \x80 \x82 \x84 def" , "ignore" , "abc\uff44 def" ),
114+ codectests = shiftjis_commonenctests + (
115+ (b"abc\x80 \x80 \x82 \x84 " , "replace" , "abc\ufffd \ufffd \uff44 " ),
116+ (b"abc\x80 \x80 \x82 \x84 \x88 " , "replace" , "abc\ufffd \ufffd \uff44 \ufffd " ),
117+
92118 # sjis vs cp932
93119 (b"\\ \x7e " , "replace" , "\xa5 \u203e " ),
94120 (b"\x81 \x5f \x81 \x61 \x81 \x7c " , "replace" , "\x5c \u2016 \u2212 " ),
0 commit comments