103103
104104class UrlParseTestCase (unittest .TestCase ):
105105
106- def checkRoundtrips (self , url , parsed , split ):
106+ def checkRoundtrips (self , url , parsed , split , url2 = None ):
107+ if url2 is None :
108+ url2 = url
107109 result = urllib .parse .urlparse (url )
108110 self .assertEqual (result , parsed )
109111 t = (result .scheme , result .netloc , result .path ,
110112 result .params , result .query , result .fragment )
111113 self .assertEqual (t , parsed )
112114 # put it back together and it should be the same
113115 result2 = urllib .parse .urlunparse (result )
114- self .assertEqual (result2 , url )
116+ self .assertEqual (result2 , url2 )
115117 self .assertEqual (result2 , result .geturl ())
116118
117119 # the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ def checkRoundtrips(self, url, parsed, split):
137139 result .query , result .fragment )
138140 self .assertEqual (t , split )
139141 result2 = urllib .parse .urlunsplit (result )
140- self .assertEqual (result2 , url )
142+ self .assertEqual (result2 , url2 )
141143 self .assertEqual (result2 , result .geturl ())
142144
143145 # check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ def test_qs(self):
175177
176178 def test_roundtrips (self ):
177179 str_cases = [
180+ ('path/to/file' ,
181+ ('' , '' , 'path/to/file' , '' , '' , '' ),
182+ ('' , '' , 'path/to/file' , '' , '' )),
183+ ('/path/to/file' ,
184+ ('' , '' , '/path/to/file' , '' , '' , '' ),
185+ ('' , '' , '/path/to/file' , '' , '' )),
186+ ('//path/to/file' ,
187+ ('' , 'path' , '/to/file' , '' , '' , '' ),
188+ ('' , 'path' , '/to/file' , '' , '' )),
189+ ('////path/to/file' ,
190+ ('' , '' , '//path/to/file' , '' , '' , '' ),
191+ ('' , '' , '//path/to/file' , '' , '' )),
192+ ('scheme:path/to/file' ,
193+ ('scheme' , '' , 'path/to/file' , '' , '' , '' ),
194+ ('scheme' , '' , 'path/to/file' , '' , '' )),
195+ ('scheme:/path/to/file' ,
196+ ('scheme' , '' , '/path/to/file' , '' , '' , '' ),
197+ ('scheme' , '' , '/path/to/file' , '' , '' )),
198+ ('scheme://path/to/file' ,
199+ ('scheme' , 'path' , '/to/file' , '' , '' , '' ),
200+ ('scheme' , 'path' , '/to/file' , '' , '' )),
201+ ('scheme:////path/to/file' ,
202+ ('scheme' , '' , '//path/to/file' , '' , '' , '' ),
203+ ('scheme' , '' , '//path/to/file' , '' , '' )),
178204 ('file:///tmp/junk.txt' ,
179205 ('file' , '' , '/tmp/junk.txt' , '' , '' , '' ),
180206 ('file' , '' , '/tmp/junk.txt' , '' , '' )),
207+ ('file:////tmp/junk.txt' ,
208+ ('file' , '' , '//tmp/junk.txt' , '' , '' , '' ),
209+ ('file' , '' , '//tmp/junk.txt' , '' , '' )),
210+ ('file://///tmp/junk.txt' ,
211+ ('file' , '' , '///tmp/junk.txt' , '' , '' , '' ),
212+ ('file' , '' , '///tmp/junk.txt' , '' , '' )),
181213 ('imap://mail.python.org/mbox1' ,
182214 ('imap' , 'mail.python.org' , '/mbox1' , '' , '' , '' ),
183215 ('imap' , 'mail.python.org' , '/mbox1' , '' , '' )),
@@ -208,6 +240,38 @@ def _encode(t):
208240 for url , parsed , split in str_cases + bytes_cases :
209241 self .checkRoundtrips (url , parsed , split )
210242
243+ def test_roundtrips_normalization (self ):
244+ str_cases = [
245+ ('///path/to/file' ,
246+ '/path/to/file' ,
247+ ('' , '' , '/path/to/file' , '' , '' , '' ),
248+ ('' , '' , '/path/to/file' , '' , '' )),
249+ ('scheme:///path/to/file' ,
250+ 'scheme:/path/to/file' ,
251+ ('scheme' , '' , '/path/to/file' , '' , '' , '' ),
252+ ('scheme' , '' , '/path/to/file' , '' , '' )),
253+ ('file:/tmp/junk.txt' ,
254+ 'file:///tmp/junk.txt' ,
255+ ('file' , '' , '/tmp/junk.txt' , '' , '' , '' ),
256+ ('file' , '' , '/tmp/junk.txt' , '' , '' )),
257+ ('http:/tmp/junk.txt' ,
258+ 'http:///tmp/junk.txt' ,
259+ ('http' , '' , '/tmp/junk.txt' , '' , '' , '' ),
260+ ('http' , '' , '/tmp/junk.txt' , '' , '' )),
261+ ('https:/tmp/junk.txt' ,
262+ 'https:///tmp/junk.txt' ,
263+ ('https' , '' , '/tmp/junk.txt' , '' , '' , '' ),
264+ ('https' , '' , '/tmp/junk.txt' , '' , '' )),
265+ ]
266+ def _encode (t ):
267+ return (t [0 ].encode ('ascii' ),
268+ t [1 ].encode ('ascii' ),
269+ tuple (x .encode ('ascii' ) for x in t [2 ]),
270+ tuple (x .encode ('ascii' ) for x in t [3 ]))
271+ bytes_cases = [_encode (x ) for x in str_cases ]
272+ for url , url2 , parsed , split in str_cases + bytes_cases :
273+ self .checkRoundtrips (url , parsed , split , url2 )
274+
211275 def test_http_roundtrips (self ):
212276 # urllib.parse.urlsplit treats 'http:' as an optimized special case,
213277 # so we test both 'http:' and 'https:' in all the following.
0 commit comments