Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions Doc/library/codecs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1316,16 +1316,10 @@ encodings.
| | | code actually uses UTF-8 |
| | | by default. |
+--------------------+---------+---------------------------+
| unicode_internal | | Return the internal |
| | | representation of the |
| | | operand. Stateful codecs |
| | | are not supported. |
| | | |
| | | .. deprecated:: 3.3 |
| | | This representation is |
| | | obsoleted by |
| | | :pep:`393`. |
+--------------------+---------+---------------------------+

.. versionchanged:: 3.8
"unicode_internal" codec is removed.


.. _binary-transforms:

Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.8.rst
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,9 @@ The following features and APIs have been removed from Python 3.8:
* Removed the ``doctype()`` method of :class:`~xml.etree.ElementTree.XMLParser`.
(Contributed by Serhiy Storchaka in :issue:`29209`.)

* "unicode_internal" codec is removed.
(Contributed by Inada Naoki in :issue:`36297`.)


Porting to Python 3.8
=====================
Expand Down
9 changes: 0 additions & 9 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -896,15 +896,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
Py_ssize_t length /* Number of Py_UNICODE chars to encode */
) Py_DEPRECATED(3.3);

/* --- Unicode Internal Codec --------------------------------------------- */

/* Only for internal use in _codecsmodule.c */
PyObject *_PyUnicode_DecodeUnicodeInternal(
const char *string,
Py_ssize_t length,
const char *errors
);

/* --- Latin-1 Codecs ----------------------------------------------------- */

PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
Expand Down
45 changes: 0 additions & 45 deletions Lib/encodings/unicode_internal.py

This file was deleted.

66 changes: 11 additions & 55 deletions Lib/test/test_codeccallbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,42 +211,6 @@ def test_charmapencode(self):
charmap[ord("?")] = "XYZ" # wrong type in mapping
self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)

def test_decodeunicodeinternal(self):
with test.support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
self.assertRaises(
UnicodeDecodeError,
b"\x00\x00\x00\x00\x00".decode,
"unicode-internal",
)
if len('\0'.encode('unicode-internal')) == 4:
def handler_unicodeinternal(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
return ("\x01", 1)

self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
"\u0000"
)

self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
"\u0000\ufffd"
)

self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "backslashreplace"),
"\u0000\\x00"
)

codecs.register_error("test.hui", handler_unicodeinternal)

self.assertEqual(
b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
"\u0000\u0001\u0000"
)

def test_callbacks(self):
def handler1(exc):
r = range(exc.start, exc.end)
Expand Down Expand Up @@ -794,16 +758,13 @@ def test_badhandlerresults(self):
("ascii", b"\xff"),
("utf-8", b"\xff"),
("utf-7", b"+x-"),
("unicode-internal", b"\x00"),
):
with test.support.check_warnings():
# unicode-internal has been deprecated
self.assertRaises(
TypeError,
bytes.decode,
enc,
"test.badhandler"
)
self.assertRaises(
TypeError,
bytes.decode,
enc,
"test.badhandler"
)

def test_lookup(self):
self.assertEqual(codecs.strict_errors, codecs.lookup_error("strict"))
Expand Down Expand Up @@ -1013,7 +974,6 @@ def test_mutatingdecodehandler(self):
("utf-32", b"\xff"),
("unicode-escape", b"\\u123g"),
("raw-unicode-escape", b"\\u123g"),
("unicode-internal", b"\xff"),
]

def replacing(exc):
Expand All @@ -1024,11 +984,9 @@ def replacing(exc):
raise TypeError("don't know how to handle %r" % exc)
codecs.register_error("test.replacing", replacing)

with test.support.check_warnings():
# unicode-internal has been deprecated
for (encoding, data) in baddata:
with self.assertRaises(TypeError):
data.decode(encoding, "test.replacing")
for (encoding, data) in baddata:
with self.assertRaises(TypeError):
data.decode(encoding, "test.replacing")

def mutating(exc):
if isinstance(exc, UnicodeDecodeError):
Expand All @@ -1039,10 +997,8 @@ def mutating(exc):
codecs.register_error("test.mutating", mutating)
# If the decoder doesn't pick up the modified input the following
# will lead to an endless loop
with test.support.check_warnings():
# unicode-internal has been deprecated
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
for (encoding, data) in baddata:
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")

# issue32583
def test_crashing_decode_handler(self):
Expand Down
107 changes: 5 additions & 102 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1239,16 +1239,6 @@ def test_errors(self):
self.assertEqual(decode(br"[\x0]\x0", "replace"), (b"[?]?", 8))


class RecodingTest(unittest.TestCase):
def test_recoding(self):
f = io.BytesIO()
with codecs.EncodedFile(f, "unicode_internal", "utf-8") as f2:
f2.write("a")
# Python used to crash on this at exit because of a refcount
# bug in _codecsmodule.c

self.assertTrue(f.closed)

# From RFC 3492
punycode_testcases = [
# A Arabic (Egyptian):
Expand Down Expand Up @@ -1378,87 +1368,6 @@ def test_decode(self):
self.assertEqual(uni, puny.decode("punycode"))


class UnicodeInternalTest(unittest.TestCase):
@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_bug1251300(self):
# Decoding with unicode_internal used to not correctly handle "code
# points" above 0x10ffff on UCS-4 builds.
ok = [
(b"\x00\x10\xff\xff", "\U0010ffff"),
(b"\x00\x00\x01\x01", "\U00000101"),
(b"", ""),
]
not_ok = [
b"\x7f\xff\xff\xff",
b"\x80\x00\x00\x00",
b"\x81\x00\x00\x00",
b"\x00",
b"\x00\x00\x00\x00\x00",
]
for internal, uni in ok:
if sys.byteorder == "little":
internal = bytes(reversed(internal))
with support.check_warnings():
self.assertEqual(uni, internal.decode("unicode_internal"))
for internal in not_ok:
if sys.byteorder == "little":
internal = bytes(reversed(internal))
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
self.assertRaises(UnicodeDecodeError, internal.decode,
"unicode_internal")
if sys.byteorder == "little":
invalid = b"\x00\x00\x11\x00"
invalid_backslashreplace = r"\x00\x00\x11\x00"
else:
invalid = b"\x00\x11\x00\x00"
invalid_backslashreplace = r"\x00\x11\x00\x00"
with support.check_warnings():
self.assertRaises(UnicodeDecodeError,
invalid.decode, "unicode_internal")
with support.check_warnings():
self.assertEqual(invalid.decode("unicode_internal", "replace"),
'\ufffd')
with support.check_warnings():
self.assertEqual(invalid.decode("unicode_internal", "backslashreplace"),
invalid_backslashreplace)

@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_error_attributes(self):
try:
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
b"\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
except UnicodeDecodeError as ex:
self.assertEqual("unicode_internal", ex.encoding)
self.assertEqual(b"\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
self.assertEqual(4, ex.start)
self.assertEqual(8, ex.end)
else:
self.fail()

@unittest.skipUnless(SIZEOF_WCHAR_T == 4, 'specific to 32-bit wchar_t')
def test_decode_callback(self):
codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
decoder = codecs.getdecoder("unicode_internal")
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
ab = "ab".encode("unicode_internal").decode()
ignored = decoder(bytes("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
"ascii"),
"UnicodeInternalTest")
self.assertEqual(("ab", 12), ignored)

def test_encode_length(self):
with support.check_warnings(('unicode_internal codec has been '
'deprecated', DeprecationWarning)):
# Issue 3739
encoder = codecs.getencoder("unicode_internal")
self.assertEqual(encoder("a")[1], 1)
self.assertEqual(encoder("\xe9\u0142")[1], 2)

self.assertEqual(codecs.escape_encode(br'\x00')[1], 4)

# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
nameprep_tests = [
# 3.1 Map to nothing.
Expand Down Expand Up @@ -1949,7 +1858,6 @@ def test_basic(self):
"shift_jisx0213",
"tis_620",
"unicode_escape",
"unicode_internal",
"utf_16",
"utf_16_be",
"utf_16_le",
Expand All @@ -1969,7 +1877,6 @@ def test_basic(self):
# The following encodings don't work in stateful mode
broken_unicode_with_stateful = [
"punycode",
"unicode_internal"
]


Expand All @@ -1984,12 +1891,10 @@ def test_basics(self):
name = "latin_1"
self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))

with support.check_warnings():
# unicode-internal has been deprecated
(b, size) = codecs.getencoder(encoding)(s)
self.assertEqual(size, len(s), "encoding=%r" % encoding)
(chars, size) = codecs.getdecoder(encoding)(b)
self.assertEqual(chars, s, "encoding=%r" % encoding)
(b, size) = codecs.getencoder(encoding)(s)
self.assertEqual(size, len(s), "encoding=%r" % encoding)
(chars, size) = codecs.getdecoder(encoding)(b)
self.assertEqual(chars, s, "encoding=%r" % encoding)

if encoding not in broken_unicode_with_stateful:
# check stream reader/writer
Expand Down Expand Up @@ -2116,9 +2021,7 @@ def test_bad_decode_args(self):
def test_bad_encode_args(self):
for encoding in all_unicode_encodings:
encoder = codecs.getencoder(encoding)
with support.check_warnings():
# unicode-internal has been deprecated
self.assertRaises(TypeError, encoder)
self.assertRaises(TypeError, encoder)

def test_encoding_map_type_initialized(self):
from encodings import cp1140
Expand Down
Loading