@@ -1867,37 +1867,35 @@ def getFileItems(filename, commentPrefix='#', unicode_=True, lowercase=False, un
18671867 Returns newline delimited items contained inside file
18681868 """
18691869
1870- retVal = []
1870+ retVal = list () if not unique else set ()
18711871
18721872 checkFile (filename )
18731873
1874- if unicode_ :
1875- f = codecs .open (filename , 'r' , UNICODE_ENCODING )
1876- else :
1877- f = open (filename , 'r' )
1878-
1879- for line in f .readlines (): # xreadlines doesn't return unicode strings when codec.open() is used
1880- if commentPrefix :
1881- if line .find (commentPrefix ) != - 1 :
1882- line = line [:line .find (commentPrefix )]
1874+ with codecs .open (filename , 'r' , UNICODE_ENCODING ) if unicode_ else open (filename , 'r' ) as f :
1875+ for line in (f .readlines () if unicode_ else f .xreadlines ()): # xreadlines doesn't return unicode strings when codec.open() is used
1876+ if commentPrefix :
1877+ if line .find (commentPrefix ) != - 1 :
1878+ line = line [:line .find (commentPrefix )]
18831879
1884- line = line .strip ()
1880+ line = line .strip ()
18851881
1886- if not unicode_ :
1887- try :
1888- line = str .encode (line )
1889- except UnicodeDecodeError :
1890- continue
1891- if line :
1892- if lowercase :
1893- line = line .lower ()
1882+ if not unicode_ :
1883+ try :
1884+ line = str .encode (line )
1885+ except UnicodeDecodeError :
1886+ continue
18941887
1895- if unique and line in retVal :
1896- continue
1888+ if line :
1889+ if lowercase :
1890+ line = line .lower ()
18971891
1898- retVal .append (line )
1892+ if unique and line in retVal :
1893+ continue
18991894
1900- f .close ()
1895+ if unique :
1896+ retVal .add (line )
1897+ else :
1898+ retVal .append (line )
19011899
19021900 return retVal
19031901
@@ -3019,8 +3017,11 @@ def asciifyUrl(url, forceQuote=False):
30193017 # apparently not an url
30203018 return url
30213019
3020+ if all (char in string .printable for char in url ):
3021+ return url
3022+
30223023 # idna-encode domain
3023- hostname = parts .hostname .encode (' idna' )
3024+ hostname = parts .hostname .encode (" idna" )
30243025
30253026 # UTF8-quote the other parts. We check each part individually if
30263027 # if needs to be quoted - that should catch some additional user
@@ -3031,14 +3032,14 @@ def quote(s, safe):
30313032 # Triggers on non-ascii characters - another option would be:
30323033 # urllib.quote(s.replace('%', '')) != s.replace('%', '')
30333034 # which would trigger on all %-characters, e.g. "&".
3034- if s .encode (' ascii' , ' replace' ) != s or forceQuote :
3035- return urllib .quote (s .encode (' utf8' ), safe = safe )
3035+ if s .encode (" ascii" , " replace" ) != s or forceQuote :
3036+ return urllib .quote (s .encode (" utf8" ), safe = safe )
30363037 return s
30373038
30383039 username = quote (parts .username , '' )
30393040 password = quote (parts .password , safe = '' )
30403041 path = quote (parts .path , safe = '/' )
3041- query = quote (parts .query , safe = '&=' )
3042+ query = quote (parts .query , safe = "&=" )
30423043
30433044 # put everything back together
30443045 netloc = hostname
@@ -3076,7 +3077,7 @@ def geturl(self):
30763077 warnMsg = "badly formed HTML at the given url ('%s'). Will try to filter it" % url
30773078 logger .warning (warnMsg )
30783079 response .seek (0 )
3079- filtered = _ ("" .join (re .findall (r' <form(?!.+<form).+?</form>' , response .read (), re .I | re .S )), response .geturl ())
3080+ filtered = _ ("" .join (re .findall (r" <form(?!.+<form).+?</form>" , response .read (), re .I | re .S )), response .geturl ())
30803081 try :
30813082 forms = ParseResponse (filtered , backwards_compat = False )
30823083 except ParseError :
@@ -3089,7 +3090,7 @@ def geturl(self):
30893090 if forms :
30903091 for form in forms :
30913092 for control in form .controls :
3092- if hasattr (control , ' items' ):
3093+ if hasattr (control , " items" ):
30933094 # if control has selectable items select first non-disabled
30943095 for item in control .items :
30953096 if not item .disabled :
0 commit comments