Skip to content

Commit 2cded9c

Browse files
author
Victor Stinner
committed
Issue python#12016: Multibyte CJK decoders now resynchronize faster
They only ignore the first byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'.
1 parent 081fe46 commit 2cded9c

File tree

13 files changed

+159
-93
lines changed

13 files changed

+159
-93
lines changed

Doc/whatsnew/3.3.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,29 @@ New, Improved, and Deprecated Modules
6868

6969
* Stub
7070

71+
codecs
72+
------
73+
74+
Multibyte CJK decoders now resynchronize faster. They only ignore the first
75+
byte of an invalid byte sequence. For example, b'\xff\n'.decode('gb2312',
76+
'replace') gives '�\n' instead of '�'.
77+
78+
(http://bugs.python.org/issue12016)
79+
80+
Don't reset incremental encoders of CJK codecs at each call to their encode()
81+
method anymore. For example: ::
82+
83+
$ ./python -q
84+
>>> import codecs
85+
>>> encoder = codecs.getincrementalencoder('hz')('strict')
86+
>>> b''.join(encoder.encode(x) for x in '\u52ff\u65bd\u65bc\u4eba\u3002 Bye.')
87+
b'~{NpJ)l6HK!#~} Bye.'
88+
89+
This example gives b'~{Np~}~{J)~}~{l6~}~{HK~}~{!#~} Bye.' with older Python
90+
versions.
91+
92+
(http://bugs.python.org/issue12100)
93+
7194
faulthandler
7295
------------
7396

Lib/test/test_codecencodings_cn.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase):
1515
# invalid bytes
1616
(b"abc\x81\x81\xc1\xc4", "strict", None),
1717
(b"abc\xc8", "strict", None),
18-
(b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
19-
(b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
18+
(b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
19+
(b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
2020
(b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"),
2121
(b"\xc1\x64", "strict", None),
2222
)
@@ -28,8 +28,8 @@ class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase):
2828
# invalid bytes
2929
(b"abc\x80\x80\xc1\xc4", "strict", None),
3030
(b"abc\xc8", "strict", None),
31-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
32-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
31+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
32+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
3333
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
3434
(b"\x83\x34\x83\x31", "strict", None),
3535
("\u30fb", "strict", None),
@@ -42,11 +42,14 @@ class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
4242
# invalid bytes
4343
(b"abc\x80\x80\xc1\xc4", "strict", None),
4444
(b"abc\xc8", "strict", None),
45-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
46-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
45+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
46+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
4747
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"),
48-
(b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
48+
(b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
4949
("\u30fb", "strict", b"\x819\xa79"),
50+
(b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
51+
(b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
52+
(b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
5053
)
5154
has_iso10646 = True
5255

@@ -74,9 +77,11 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
7477
'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
7578
'Bye.\n'),
7679
# invalid bytes
77-
(b'ab~cd', 'replace', 'ab\uFFFDd'),
80+
(b'ab~cd', 'replace', 'ab\uFFFDcd'),
7881
(b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
7982
(b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
83+
(b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
84+
(b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
8085
)
8186

8287
def test_main():

Lib/test/test_codecencodings_hk.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ class Test_Big5HKSCS(test_multibytecodec_support.TestBase, unittest.TestCase):
1515
# invalid bytes
1616
(b"abc\x80\x80\xc1\xc4", "strict", None),
1717
(b"abc\xc8", "strict", None),
18-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
19-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
18+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
19+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
2020
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
2121
)
2222

Lib/test/test_codecencodings_jp.py

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -15,80 +15,106 @@ class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase):
1515
# invalid bytes
1616
(b"abc\x81\x00\x81\x00\x82\x84", "strict", None),
1717
(b"abc\xf8", "strict", None),
18-
(b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
19-
(b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
20-
(b"abc\x81\x00\x82\x84", "ignore", "abc\uff44"),
18+
(b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
19+
(b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
20+
(b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"),
21+
(b"ab\xEBxy", "replace", "ab\uFFFDxy"),
22+
(b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
23+
(b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
2124
# sjis vs cp932
2225
(b"\\\x7e", "replace", "\\\x7e"),
2326
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
2427
)
2528

29+
euc_commontests = (
30+
# invalid bytes
31+
(b"abc\x80\x80\xc1\xc4", "strict", None),
32+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
33+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
34+
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
35+
(b"abc\xc8", "strict", None),
36+
(b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
37+
(b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
38+
(b"\xc1\x64", "strict", None),
39+
(b"\xa1\xc0", "strict", "\uff3c"),
40+
(b"\xa1\xc0\\", "strict", "\uff3c\\"),
41+
(b"\x8eXY", "replace", "\ufffdXY"),
42+
)
43+
44+
class Test_EUC_JIS_2004(test_multibytecodec_support.TestBase,
45+
unittest.TestCase):
46+
encoding = 'euc_jis_2004'
47+
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
48+
codectests = euc_commontests
49+
xmlcharnametest = (
50+
"\xab\u211c\xbb = \u2329\u1234\u232a",
51+
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
52+
)
53+
2654
class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
2755
unittest.TestCase):
2856
encoding = 'euc_jisx0213'
2957
tstring = test_multibytecodec_support.load_teststring('euc_jisx0213')
30-
codectests = (
31-
# invalid bytes
32-
(b"abc\x80\x80\xc1\xc4", "strict", None),
33-
(b"abc\xc8", "strict", None),
34-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
35-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
36-
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
37-
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
38-
(b"\xc1\x64", "strict", None),
39-
(b"\xa1\xc0", "strict", "\uff3c"),
40-
)
58+
codectests = euc_commontests
4159
xmlcharnametest = (
4260
"\xab\u211c\xbb = \u2329\u1234\u232a",
4361
b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
4462
)
4563

46-
eucjp_commontests = (
47-
(b"abc\x80\x80\xc1\xc4", "strict", None),
48-
(b"abc\xc8", "strict", None),
49-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
50-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
51-
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"),
52-
(b"abc\x8f\x83\x83", "replace", "abc\ufffd"),
53-
(b"\xc1\x64", "strict", None),
54-
)
55-
5664
class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase,
5765
unittest.TestCase):
5866
encoding = 'euc_jp'
5967
tstring = test_multibytecodec_support.load_teststring('euc_jp')
60-
codectests = eucjp_commontests + (
61-
(b"\xa1\xc0\\", "strict", "\uff3c\\"),
68+
codectests = euc_commontests + (
6269
("\xa5", "strict", b"\x5c"),
6370
("\u203e", "strict", b"\x7e"),
6471
)
6572

6673
shiftjis_commonenctests = (
6774
(b"abc\x80\x80\x82\x84", "strict", None),
6875
(b"abc\xf8", "strict", None),
69-
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
70-
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
7176
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
7277
)
7378

7479
class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
7580
encoding = 'shift_jis'
7681
tstring = test_multibytecodec_support.load_teststring('shift_jis')
7782
codectests = shiftjis_commonenctests + (
83+
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
84+
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
85+
7886
(b"\\\x7e", "strict", "\\\x7e"),
7987
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
88+
(b"abc\x81\x39", "replace", "abc\ufffd9"),
89+
(b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"),
90+
(b"abc\xFF\x58", "replace", "abc\ufffdX"),
91+
)
92+
93+
class Test_SJIS_2004(test_multibytecodec_support.TestBase, unittest.TestCase):
94+
encoding = 'shift_jis_2004'
95+
tstring = test_multibytecodec_support.load_teststring('shift_jis')
96+
codectests = shiftjis_commonenctests + (
97+
(b"\\\x7e", "strict", "\xa5\u203e"),
98+
(b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
99+
(b"abc\xEA\xFC", "strict", "abc\u64bf"),
100+
(b"\x81\x39xy", "replace", "\ufffd9xy"),
101+
(b"\xFF\x58xy", "replace", "\ufffdXxy"),
102+
(b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
103+
(b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
104+
(b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
105+
)
106+
xmlcharnametest = (
107+
"\xab\u211c\xbb = \u2329\u1234\u232a",
108+
b"\x85Gℜ\x85Q = ⟨ሴ⟩"
80109
)
81110

82111
class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
83112
encoding = 'shift_jisx0213'
84113
tstring = test_multibytecodec_support.load_teststring('shift_jisx0213')
85-
codectests = (
86-
# invalid bytes
87-
(b"abc\x80\x80\x82\x84", "strict", None),
88-
(b"abc\xf8", "strict", None),
89-
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
90-
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
91-
(b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"),
114+
codectests = shiftjis_commonenctests + (
115+
(b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
116+
(b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
117+
92118
# sjis vs cp932
93119
(b"\\\x7e", "replace", "\xa5\u203e"),
94120
(b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),

Lib/test/test_codecencodings_kr.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ class Test_CP949(test_multibytecodec_support.TestBase, unittest.TestCase):
1515
# invalid bytes
1616
(b"abc\x80\x80\xc1\xc4", "strict", None),
1717
(b"abc\xc8", "strict", None),
18-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
19-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
18+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\uc894"),
19+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
2020
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
2121
)
2222

@@ -27,8 +27,8 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
2727
# invalid bytes
2828
(b"abc\x80\x80\xc1\xc4", "strict", None),
2929
(b"abc\xc8", "strict", None),
30-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
31-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
30+
(b"abc\x80\x80\xc1\xc4", "replace", 'abc\ufffd\ufffd\uc894'),
31+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\uc894\ufffd"),
3232
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\uc894"),
3333

3434
# composed make-up sequence errors
@@ -40,13 +40,14 @@ class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
4040
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4", "strict", None),
4141
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "strict", "\uc4d4"),
4242
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4x", "strict", "\uc4d4x"),
43-
(b"a\xa4\xd4\xa4\xb6\xa4", "replace", "a\ufffd"),
43+
(b"a\xa4\xd4\xa4\xb6\xa4", "replace", 'a\ufffd'),
4444
(b"\xa4\xd4\xa3\xb6\xa4\xd0\xa4\xd4", "strict", None),
4545
(b"\xa4\xd4\xa4\xb6\xa3\xd0\xa4\xd4", "strict", None),
4646
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa3\xd4", "strict", None),
47-
(b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", "\ufffd"),
48-
(b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", "\ufffd"),
49-
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", "\ufffd"),
47+
(b"\xa4\xd4\xa4\xff\xa4\xd0\xa4\xd4", "replace", '\ufffd\u6e21\ufffd\u3160\ufffd'),
48+
(b"\xa4\xd4\xa4\xb6\xa4\xff\xa4\xd4", "replace", '\ufffd\u6e21\ub544\ufffd\ufffd'),
49+
(b"\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xff", "replace", '\ufffd\u6e21\ub544\u572d\ufffd'),
50+
(b"\xa4\xd4\xff\xa4\xd4\xa4\xb6\xa4\xd0\xa4\xd4", "replace", '\ufffd\ufffd\ufffd\uc4d4'),
5051
(b"\xc1\xc4", "strict", "\uc894"),
5152
)
5253

@@ -57,9 +58,13 @@ class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
5758
# invalid bytes
5859
(b"abc\x80\x80\xc1\xc4", "strict", None),
5960
(b"abc\xc8", "strict", None),
60-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
61-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
61+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\ucd27"),
62+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\ucd27\ufffd"),
6263
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\ucd27"),
64+
(b"\xD8abc", "replace", "\uFFFDabc"),
65+
(b"\xD8\xFFabc", "replace", "\uFFFD\uFFFDabc"),
66+
(b"\x84bxy", "replace", "\uFFFDbxy"),
67+
(b"\x8CBxy", "replace", "\uFFFDBxy"),
6368
)
6469

6570
def test_main():

Lib/test/test_codecencodings_tw.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ class Test_Big5(test_multibytecodec_support.TestBase, unittest.TestCase):
1515
# invalid bytes
1616
(b"abc\x80\x80\xc1\xc4", "strict", None),
1717
(b"abc\xc8", "strict", None),
18-
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
19-
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
18+
(b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u8b10"),
19+
(b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u8b10\ufffd"),
2020
(b"abc\x80\x80\xc1\xc4", "ignore", "abc\u8b10"),
2121
)
2222

Lib/test/test_codecmaps_tw.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ class TestCP950Map(test_multibytecodec_support.TestBase_Mapping,
2323
(b'\xa2\xcc', '\u5341'),
2424
(b'\xa2\xce', '\u5345'),
2525
]
26+
codectests = (
27+
(b"\xFFxy", "replace", "\ufffdxy"),
28+
)
2629

2730
def test_main():
2831
support.run_unittest(__name__)

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ Core and Builtins
219219
Library
220220
-------
221221

222+
- Issue #12016: Multibyte CJK decoders now resynchronize faster. They only
223+
ignore the first byte of an invalid byte sequence. For example,
224+
b'\xff\n'.decode('gb2312', 'replace') gives '\ufffd\n' instead of '\ufffd'.
225+
222226
- Issue #12459: time.sleep() now raises a ValueError if the sleep length is
223227
negative, instead of an infinite sleep on Windows or raising an IOError on
224228
Linux for example, to have the same behaviour on all platforms.

Modules/cjkcodecs/_codecs_cn.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ DECODER(gb2312)
8585
TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
8686
NEXT(2, 1)
8787
}
88-
else return 2;
88+
else return 1;
8989
}
9090

9191
return 0;
@@ -141,7 +141,7 @@ DECODER(gbk)
141141
REQUIRE_INBUF(2)
142142

143143
GBK_DECODE(c, IN2, **outbuf)
144-
else return 2;
144+
else return 1;
145145

146146
NEXT(2, 1)
147147
}
@@ -267,7 +267,7 @@ DECODER(gb18030)
267267
c3 = IN3;
268268
c4 = IN4;
269269
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
270-
return 4;
270+
return 1;
271271
c -= 0x81; c2 -= 0x30;
272272
c3 -= 0x81; c4 -= 0x30;
273273

@@ -292,12 +292,12 @@ DECODER(gb18030)
292292
continue;
293293
}
294294
}
295-
return 4;
295+
return 1;
296296
}
297297

298298
GBK_DECODE(c, c2, **outbuf)
299299
else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
300-
else return 2;
300+
else return 1;
301301

302302
NEXT(2, 1)
303303
}
@@ -400,7 +400,7 @@ DECODER(hz)
400400
else if (c2 == '\n')
401401
; /* line-continuation */
402402
else
403-
return 2;
403+
return 1;
404404
NEXT(2, 0);
405405
continue;
406406
}
@@ -419,7 +419,7 @@ DECODER(hz)
419419
NEXT(2, 1)
420420
}
421421
else
422-
return 2;
422+
return 1;
423423
}
424424
}
425425

Modules/cjkcodecs/_codecs_hk.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ DECODER(big5hkscs)
161161
case 0x8864: WRITE2(0x00ca, 0x030c); break;
162162
case 0x88a3: WRITE2(0x00ea, 0x0304); break;
163163
case 0x88a5: WRITE2(0x00ea, 0x030c); break;
164-
default: return 2;
164+
default: return 1;
165165
}
166166

167167
NEXT(2, 2) /* all decoded codepoints are pairs, above. */

0 commit comments

Comments
 (0)