66import sys
77import unittest
88import warnings
9+ import encodings
910
1011from test import support
1112
@@ -2381,68 +2382,69 @@ def test_buffer_api_usage(self):
23812382 view_decoded = codecs .decode (view , encoding )
23822383 self .assertEqual (view_decoded , data )
23832384
2384- def test_type_error_for_text_input (self ):
2385+ def test_text_to_binary_blacklists_binary_transforms (self ):
23852386 # Check binary -> binary codecs give a good error for str input
23862387 bad_input = "bad input type"
23872388 for encoding in bytes_transform_encodings :
23882389 with self .subTest (encoding = encoding ):
2389- msg = "^encoding with '{}' codec failed" .format (encoding )
2390- with self .assertRaisesRegex (TypeError , msg ) as failure :
2390+ fmt = ( "{!r} is not a text encoding; "
2391+ "use codecs.encode\(\) to handle arbitrary codecs" )
2392+ msg = fmt .format (encoding )
2393+ with self .assertRaisesRegex (LookupError , msg ) as failure :
23912394 bad_input .encode (encoding )
2392- self .assertTrue (isinstance (failure .exception .__cause__ ,
2393- TypeError ))
2395+ self .assertIsNone (failure .exception .__cause__ )
2396+
2397+ def test_text_to_binary_blacklists_text_transforms (self ):
2398+ # Check str.encode gives a good error message for str -> str codecs
2399+ msg = (r"^'rot_13' is not a text encoding; "
2400+ "use codecs.encode\(\) to handle arbitrary codecs" )
2401+ with self .assertRaisesRegex (LookupError , msg ):
2402+ "just an example message" .encode ("rot_13" )
23942403
2395- def test_type_error_for_binary_input (self ):
2404+ def test_binary_to_text_blacklists_binary_transforms (self ):
2405+ # Check bytes.decode and bytearray.decode give a good error
2406+ # message for binary -> binary codecs
2407+ data = b"encode first to ensure we meet any format restrictions"
2408+ for encoding in bytes_transform_encodings :
2409+ with self .subTest (encoding = encoding ):
2410+ encoded_data = codecs .encode (data , encoding )
2411+ fmt = (r"{!r} is not a text encoding; "
2412+ "use codecs.decode\(\) to handle arbitrary codecs" )
2413+ msg = fmt .format (encoding )
2414+ with self .assertRaisesRegex (LookupError , msg ):
2415+ encoded_data .decode (encoding )
2416+ with self .assertRaisesRegex (LookupError , msg ):
2417+ bytearray (encoded_data ).decode (encoding )
2418+
2419+ def test_binary_to_text_blacklists_text_transforms (self ):
23962420 # Check str -> str codec gives a good error for binary input
23972421 for bad_input in (b"immutable" , bytearray (b"mutable" )):
23982422 with self .subTest (bad_input = bad_input ):
2399- msg = "^decoding with 'rot_13' codec failed"
2400- with self .assertRaisesRegex (AttributeError , msg ) as failure :
2423+ msg = (r"^'rot_13' is not a text encoding; "
2424+ "use codecs.decode\(\) to handle arbitrary codecs" )
2425+ with self .assertRaisesRegex (LookupError , msg ) as failure :
24012426 bad_input .decode ("rot_13" )
2402- self .assertTrue (isinstance (failure .exception .__cause__ ,
2403- AttributeError ))
2427+ self .assertIsNone (failure .exception .__cause__ )
24042428
24052429 def test_custom_zlib_error_is_wrapped (self ):
24062430 # Check zlib codec gives a good error for malformed input
24072431 msg = "^decoding with 'zlib_codec' codec failed"
24082432 with self .assertRaisesRegex (Exception , msg ) as failure :
2409- b"hello" . decode ( "zlib_codec" )
2410- self .assertTrue ( isinstance (failure .exception .__cause__ ,
2411- type (failure .exception ) ))
2433+ codecs . decode ( b"hello" , "zlib_codec" )
2434+ self .assertIsInstance (failure .exception .__cause__ ,
2435+ type (failure .exception ))
24122436
24132437 def test_custom_hex_error_is_wrapped (self ):
24142438 # Check hex codec gives a good error for malformed input
24152439 msg = "^decoding with 'hex_codec' codec failed"
24162440 with self .assertRaisesRegex (Exception , msg ) as failure :
2417- b"hello" . decode ( "hex_codec" )
2418- self .assertTrue ( isinstance (failure .exception .__cause__ ,
2419- type (failure .exception ) ))
2441+ codecs . decode ( b"hello" , "hex_codec" )
2442+ self .assertIsInstance (failure .exception .__cause__ ,
2443+ type (failure .exception ))
24202444
24212445 # Unfortunately, the bz2 module throws OSError, which the codec
24222446 # machinery currently can't wrap :(
24232447
2424- def test_bad_decoding_output_type (self ):
2425- # Check bytes.decode and bytearray.decode give a good error
2426- # message for binary -> binary codecs
2427- data = b"encode first to ensure we meet any format restrictions"
2428- for encoding in bytes_transform_encodings :
2429- with self .subTest (encoding = encoding ):
2430- encoded_data = codecs .encode (data , encoding )
2431- fmt = ("'{}' decoder returned 'bytes' instead of 'str'; "
2432- "use codecs.decode\(\) to decode to arbitrary types" )
2433- msg = fmt .format (encoding )
2434- with self .assertRaisesRegex (TypeError , msg ):
2435- encoded_data .decode (encoding )
2436- with self .assertRaisesRegex (TypeError , msg ):
2437- bytearray (encoded_data ).decode (encoding )
2438-
2439- def test_bad_encoding_output_type (self ):
2440- # Check str.encode gives a good error message for str -> str codecs
2441- msg = ("'rot_13' encoder returned 'str' instead of 'bytes'; "
2442- "use codecs.encode\(\) to encode to arbitrary types" )
2443- with self .assertRaisesRegex (TypeError , msg ):
2444- "just an example message" .encode ("rot_13" )
2445-
24462448
24472449# The codec system tries to wrap exceptions in order to ensure the error
24482450# mentions the operation being performed and the codec involved. We
@@ -2466,27 +2468,44 @@ def setUp(self):
24662468 # case finishes by using the test case repr as the codec name
24672469 # The codecs module normalizes codec names, although this doesn't
24682470 # appear to be formally documented...
2469- self .codec_name = repr (self ).lower ().replace (" " , "-" )
2471+ # We also make sure we use a truly unique id for the custom codec
2472+ # to avoid issues with the codec cache when running these tests
2473+ # multiple times (e.g. when hunting for refleaks)
2474+ unique_id = repr (self ) + str (id (self ))
2475+ self .codec_name = encodings .normalize_encoding (unique_id ).lower ()
2476+
2477+ # We store the object to raise on the instance because of a bad
2478+ # interaction between the codec caching (which means we can't
2479+ # recreate the codec entry) and regrtest refleak hunting (which
2480+ # runs the same test instance multiple times). This means we
2481+ # need to ensure the codecs call back in to the instance to find
2482+ # out which exception to raise rather than binding them in a
2483+ # closure to an object that may change on the next run
2484+ self .obj_to_raise = RuntimeError
24702485
24712486 def tearDown (self ):
24722487 _TEST_CODECS .pop (self .codec_name , None )
24732488
2474- def set_codec (self , obj_to_raise ):
2475- def raise_obj (* args , ** kwds ):
2476- raise obj_to_raise
2477- codec_info = codecs .CodecInfo (raise_obj , raise_obj ,
2489+ def set_codec (self , encode , decode ):
2490+ codec_info = codecs .CodecInfo (encode , decode ,
24782491 name = self .codec_name )
24792492 _TEST_CODECS [self .codec_name ] = codec_info
24802493
24812494 @contextlib .contextmanager
24822495 def assertWrapped (self , operation , exc_type , msg ):
2483- full_msg = "{} with '{}' codec failed \({}: {}\)" .format (
2496+ full_msg = r "{} with {!r} codec failed \({}: {}\)" .format (
24842497 operation , self .codec_name , exc_type .__name__ , msg )
24852498 with self .assertRaisesRegex (exc_type , full_msg ) as caught :
24862499 yield caught
2500+ self .assertIsInstance (caught .exception .__cause__ , exc_type )
2501+
2502+ def raise_obj (self , * args , ** kwds ):
2503+ # Helper to dynamically change the object raised by a test codec
2504+ raise self .obj_to_raise
24872505
24882506 def check_wrapped (self , obj_to_raise , msg , exc_type = RuntimeError ):
2489- self .set_codec (obj_to_raise )
2507+ self .obj_to_raise = obj_to_raise
2508+ self .set_codec (self .raise_obj , self .raise_obj )
24902509 with self .assertWrapped ("encoding" , exc_type , msg ):
24912510 "str_input" .encode (self .codec_name )
24922511 with self .assertWrapped ("encoding" , exc_type , msg ):
@@ -2515,23 +2534,17 @@ class MyRuntimeError(RuntimeError):
25152534 pass
25162535 self .check_wrapped (MyRuntimeError (msg ), msg , MyRuntimeError )
25172536
2518- @contextlib .contextmanager
2519- def assertNotWrapped (self , operation , exc_type , msg_re , msg = None ):
2520- if msg is None :
2521- msg = msg_re
2522- with self .assertRaisesRegex (exc_type , msg ) as caught :
2523- yield caught
2524- self .assertEqual (str (caught .exception ), msg )
2525-
2526- def check_not_wrapped (self , obj_to_raise , msg_re , msg = None ):
2527- self .set_codec (obj_to_raise )
2528- with self .assertNotWrapped ("encoding" , RuntimeError , msg_re , msg ):
2537+ def check_not_wrapped (self , obj_to_raise , msg ):
2538+ def raise_obj (* args , ** kwds ):
2539+ raise obj_to_raise
2540+ self .set_codec (raise_obj , raise_obj )
2541+ with self .assertRaisesRegex (RuntimeError , msg ):
25292542 "str input" .encode (self .codec_name )
2530- with self .assertNotWrapped ( "encoding" , RuntimeError , msg_re , msg ):
2543+ with self .assertRaisesRegex ( RuntimeError , msg ):
25312544 codecs .encode ("str input" , self .codec_name )
2532- with self .assertNotWrapped ( "decoding" , RuntimeError , msg_re , msg ):
2545+ with self .assertRaisesRegex ( RuntimeError , msg ):
25332546 b"bytes input" .decode (self .codec_name )
2534- with self .assertNotWrapped ( "decoding" , RuntimeError , msg_re , msg ):
2547+ with self .assertRaisesRegex ( RuntimeError , msg ):
25352548 codecs .decode (b"bytes input" , self .codec_name )
25362549
25372550 def test_init_override_is_not_wrapped (self ):
@@ -2550,29 +2563,56 @@ def test_instance_attribute_is_not_wrapped(self):
25502563 msg = "This should NOT be wrapped"
25512564 exc = RuntimeError (msg )
25522565 exc .attr = 1
2553- self .check_not_wrapped (exc , msg )
2566+ self .check_not_wrapped (exc , "^{}$" . format ( msg ) )
25542567
25552568 def test_non_str_arg_is_not_wrapped (self ):
25562569 self .check_not_wrapped (RuntimeError (1 ), "1" )
25572570
25582571 def test_multiple_args_is_not_wrapped (self ):
2559- msg_re = "\('a', 'b', 'c'\)"
2560- msg = "('a', 'b', 'c')"
2561- self .check_not_wrapped (RuntimeError ('a' , 'b' , 'c' ), msg_re , msg )
2572+ msg_re = r"^\('a', 'b', 'c'\)$"
2573+ self .check_not_wrapped (RuntimeError ('a' , 'b' , 'c' ), msg_re )
25622574
25632575 # http://bugs.python.org/issue19609
25642576 def test_codec_lookup_failure_not_wrapped (self ):
2565- msg = "unknown encoding: %s" % self .codec_name
2577+ msg = "^ unknown encoding: {}$" . format ( self .codec_name )
25662578 # The initial codec lookup should not be wrapped
2567- with self .assertNotWrapped ( "encoding" , LookupError , msg ):
2579+ with self .assertRaisesRegex ( LookupError , msg ):
25682580 "str input" .encode (self .codec_name )
2569- with self .assertNotWrapped ( "encoding" , LookupError , msg ):
2581+ with self .assertRaisesRegex ( LookupError , msg ):
25702582 codecs .encode ("str input" , self .codec_name )
2571- with self .assertNotWrapped ( "decoding" , LookupError , msg ):
2583+ with self .assertRaisesRegex ( LookupError , msg ):
25722584 b"bytes input" .decode (self .codec_name )
2573- with self .assertNotWrapped ( "decoding" , LookupError , msg ):
2585+ with self .assertRaisesRegex ( LookupError , msg ):
25742586 codecs .decode (b"bytes input" , self .codec_name )
25752587
2588+ def test_unflagged_non_text_codec_handling (self ):
2589+ # The stdlib non-text codecs are now marked so they're
2590+ # pre-emptively skipped by the text model related methods
2591+ # However, third party codecs won't be flagged, so we still make
2592+ # sure the case where an inappropriate output type is produced is
2593+ # handled appropriately
2594+ def encode_to_str (* args , ** kwds ):
2595+ return "not bytes!" , 0
2596+ def decode_to_bytes (* args , ** kwds ):
2597+ return b"not str!" , 0
2598+ self .set_codec (encode_to_str , decode_to_bytes )
2599+ # No input or output type checks on the codecs module functions
2600+ encoded = codecs .encode (None , self .codec_name )
2601+ self .assertEqual (encoded , "not bytes!" )
2602+ decoded = codecs .decode (None , self .codec_name )
2603+ self .assertEqual (decoded , b"not str!" )
2604+ # Text model methods should complain
2605+ fmt = (r"^{!r} encoder returned 'str' instead of 'bytes'; "
2606+ "use codecs.encode\(\) to encode to arbitrary types$" )
2607+ msg = fmt .format (self .codec_name )
2608+ with self .assertRaisesRegex (TypeError , msg ):
2609+ "str_input" .encode (self .codec_name )
2610+ fmt = (r"^{!r} decoder returned 'bytes' instead of 'str'; "
2611+ "use codecs.decode\(\) to decode to arbitrary types$" )
2612+ msg = fmt .format (self .codec_name )
2613+ with self .assertRaisesRegex (TypeError , msg ):
2614+ b"bytes input" .decode (self .codec_name )
2615+
25762616
25772617
25782618@unittest .skipUnless (sys .platform == 'win32' ,
0 commit comments