@@ -158,6 +158,22 @@ def test_backslashescape(self):
158158 sout = b"a\xac \\ u1234\xa4 \\ u8000\\ U0010ffff"
159159 self .assertEqual (sin .encode ("iso-8859-15" , "backslashreplace" ), sout )
160160
161+ def test_nameescape (self ):
162+ # Does the same as backslashescape, but prefers ``\N{...}`` escape
163+ # sequences.
164+ sin = "a\xac \u1234 \u20ac \u8000 \U0010ffff "
165+ sout = (b'a\\ N{NOT SIGN}\\ N{ETHIOPIC SYLLABLE SEE}\\ N{EURO SIGN}'
166+ b'\\ N{CJK UNIFIED IDEOGRAPH-8000}\\ U0010ffff' )
167+ self .assertEqual (sin .encode ("ascii" , "namereplace" ), sout )
168+
169+ sout = (b'a\xac \\ N{ETHIOPIC SYLLABLE SEE}\\ N{EURO SIGN}'
170+ b'\\ N{CJK UNIFIED IDEOGRAPH-8000}\\ U0010ffff' )
171+ self .assertEqual (sin .encode ("latin-1" , "namereplace" ), sout )
172+
173+ sout = (b'a\xac \\ N{ETHIOPIC SYLLABLE SEE}\xa4 '
174+ b'\\ N{CJK UNIFIED IDEOGRAPH-8000}\\ U0010ffff' )
175+ self .assertEqual (sin .encode ("iso-8859-15" , "namereplace" ), sout )
176+
161177 def test_decoding_callbacks (self ):
162178 # This is a test for a decoding callback handler
163179 # that allows the decoding of the invalid sequence
@@ -297,7 +313,7 @@ def handler2(exc):
297313 def test_longstrings (self ):
298314 # test long strings to check for memory overflow problems
299315 errors = [ "strict" , "ignore" , "replace" , "xmlcharrefreplace" ,
300- "backslashreplace" ]
316+ "backslashreplace" , "namereplace" ]
301317 # register the handlers under different names,
302318 # to prevent the codec from recognizing the name
303319 for err in errors :
@@ -611,6 +627,81 @@ def test_badandgoodbackslashreplaceexceptions(self):
611627 ("\\ udfff" , 1 )
612628 )
613629
630+ def test_badandgoodnamereplaceexceptions (self ):
631+ # "namereplace" complains about a non-exception passed in
632+ self .assertRaises (
633+ TypeError ,
634+ codecs .namereplace_errors ,
635+ 42
636+ )
637+ # "namereplace" complains about the wrong exception types
638+ self .assertRaises (
639+ TypeError ,
640+ codecs .namereplace_errors ,
641+ UnicodeError ("ouch" )
642+ )
643+ # "namereplace" can only be used for encoding
644+ self .assertRaises (
645+ TypeError ,
646+ codecs .namereplace_errors ,
647+ UnicodeDecodeError ("ascii" , bytearray (b"\xff " ), 0 , 1 , "ouch" )
648+ )
649+ self .assertRaises (
650+ TypeError ,
651+ codecs .namereplace_errors ,
652+ UnicodeTranslateError ("\u3042 " , 0 , 1 , "ouch" )
653+ )
654+ # Use the correct exception
655+ self .assertEqual (
656+ codecs .namereplace_errors (
657+ UnicodeEncodeError ("ascii" , "\u3042 " , 0 , 1 , "ouch" )),
658+ ("\\ N{HIRAGANA LETTER A}" , 1 )
659+ )
660+ self .assertEqual (
661+ codecs .namereplace_errors (
662+ UnicodeEncodeError ("ascii" , "\x00 " , 0 , 1 , "ouch" )),
663+ ("\\ x00" , 1 )
664+ )
665+ self .assertEqual (
666+ codecs .namereplace_errors (
667+ UnicodeEncodeError ("ascii" , "\xff " , 0 , 1 , "ouch" )),
668+ ("\\ N{LATIN SMALL LETTER Y WITH DIAERESIS}" , 1 )
669+ )
670+ self .assertEqual (
671+ codecs .namereplace_errors (
672+ UnicodeEncodeError ("ascii" , "\u0100 " , 0 , 1 , "ouch" )),
673+ ("\\ N{LATIN CAPITAL LETTER A WITH MACRON}" , 1 )
674+ )
675+ self .assertEqual (
676+ codecs .namereplace_errors (
677+ UnicodeEncodeError ("ascii" , "\uffff " , 0 , 1 , "ouch" )),
678+ ("\\ uffff" , 1 )
679+ )
680+ if SIZEOF_WCHAR_T > 0 :
681+ self .assertEqual (
682+ codecs .namereplace_errors (
683+ UnicodeEncodeError ("ascii" , "\U00010000 " ,
684+ 0 , 1 , "ouch" )),
685+ ("\\ N{LINEAR B SYLLABLE B008 A}" , 1 )
686+ )
687+ self .assertEqual (
688+ codecs .namereplace_errors (
689+ UnicodeEncodeError ("ascii" , "\U0010ffff " ,
690+ 0 , 1 , "ouch" )),
691+ ("\\ U0010ffff" , 1 )
692+ )
693+ # Lone surrogates (regardless of unicode width)
694+ self .assertEqual (
695+ codecs .namereplace_errors (
696+ UnicodeEncodeError ("ascii" , "\ud800 " , 0 , 1 , "ouch" )),
697+ ("\\ ud800" , 1 )
698+ )
699+ self .assertEqual (
700+ codecs .namereplace_errors (
701+ UnicodeEncodeError ("ascii" , "\udfff " , 0 , 1 , "ouch" )),
702+ ("\\ udfff" , 1 )
703+ )
704+
614705 def test_badhandlerresults (self ):
615706 results = ( 42 , "foo" , (1 ,2 ,3 ), ("foo" , 1 , 3 ), ("foo" , None ), ("foo" ,), ("foo" , 1 , 3 ), ("foo" , None ), ("foo" ,) )
616707 encs = ("ascii" , "latin-1" , "iso-8859-1" , "iso-8859-15" )
@@ -651,6 +742,10 @@ def test_lookup(self):
651742 codecs .backslashreplace_errors ,
652743 codecs .lookup_error ("backslashreplace" )
653744 )
745+ self .assertEqual (
746+ codecs .namereplace_errors ,
747+ codecs .lookup_error ("namereplace" )
748+ )
654749
655750 def test_unencodablereplacement (self ):
656751 def unencrepl (exc ):
@@ -804,7 +899,8 @@ def badencodereturn2(exc):
804899 class D (dict ):
805900 def __getitem__ (self , key ):
806901 raise ValueError
807- for err in ("strict" , "replace" , "xmlcharrefreplace" , "backslashreplace" , "test.posreturn" ):
902+ for err in ("strict" , "replace" , "xmlcharrefreplace" ,
903+ "backslashreplace" , "namereplace" , "test.posreturn" ):
808904 self .assertRaises (UnicodeError , codecs .charmap_encode , "\xff " , err , {0xff : None })
809905 self .assertRaises (ValueError , codecs .charmap_encode , "\xff " , err , D ())
810906 self .assertRaises (TypeError , codecs .charmap_encode , "\xff " , err , {0xff : 300 })
0 commit comments