@@ -612,6 +612,54 @@ def test_urlsplit_attributes(self):
612612 with self .assertRaisesRegex (ValueError , "out of range" ):
613613 p .port
614614
615+ def test_urlsplit_remove_unsafe_bytes (self ):
616+ # Remove ASCII tabs and newlines from input, for http common case scenario.
617+ url = "h\n ttp://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
618+ p = urllib .parse .urlsplit (url )
619+ self .assertEqual (p .scheme , "http" )
620+ self .assertEqual (p .netloc , "www.python.org" )
621+ self .assertEqual (p .path , "/javascript:alert('msg')/" )
622+ self .assertEqual (p .query , "query=something" )
623+ self .assertEqual (p .fragment , "fragment" )
624+ self .assertEqual (p .username , None )
625+ self .assertEqual (p .password , None )
626+ self .assertEqual (p .hostname , "www.python.org" )
627+ self .assertEqual (p .port , None )
628+ self .assertEqual (p .geturl (), "http://www.python.org/javascript:alert('msg')/?query=something#fragment" )
629+
630+ # Remove ASCII tabs and newlines from input as bytes, for http common case scenario.
631+ url = b"h\n ttp://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
632+ p = urllib .parse .urlsplit (url )
633+ self .assertEqual (p .scheme , b"http" )
634+ self .assertEqual (p .netloc , b"www.python.org" )
635+ self .assertEqual (p .path , b"/javascript:alert('msg')/" )
636+ self .assertEqual (p .query , b"query=something" )
637+ self .assertEqual (p .fragment , b"fragment" )
638+ self .assertEqual (p .username , None )
639+ self .assertEqual (p .password , None )
640+ self .assertEqual (p .hostname , b"www.python.org" )
641+ self .assertEqual (p .port , None )
642+ self .assertEqual (p .geturl (), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment" )
643+
644+ # any scheme
645+ url = "x-new-scheme\t ://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
646+ p = urllib .parse .urlsplit (url )
647+ self .assertEqual (p .geturl (), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment" )
648+
649+ # Remove ASCII tabs and newlines from input as bytes, any scheme.
650+ url = b"x-new-scheme\t ://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
651+ p = urllib .parse .urlsplit (url )
652+ self .assertEqual (p .geturl (), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment" )
653+
654+ # Unsafe bytes is not returned from urlparse cache.
655+ # scheme is stored after parsing, sending an scheme with unsafe bytes *will not* return an unsafe scheme
656+ url = "https://www.python\n .org\t /java\n script:\t alert('msg\r \n ')/?query\n =\t something#frag\n ment"
657+ scheme = "htt\n ps"
658+ for _ in range (2 ):
659+ p = urllib .parse .urlsplit (url , scheme = scheme )
660+ self .assertEqual (p .scheme , "https" )
661+ self .assertEqual (p .geturl (), "https://www.python.org/javascript:alert('msg')/?query=something#fragment" )
662+
615663 def test_attributes_bad_port (self ):
616664 """Check handling of invalid ports."""
617665 for bytes in (False , True ):
0 commit comments