1717from encoding import get_encoding
1818from debug import describe , text_content , open_in_browser
1919
20- log = logging .getLogger ('readbility.readability' )
21- StandardError = Exception in python3
20+ log = logging .getLogger (__file__ )
2221
2322REGEXES = {
2423 'unlikelyCandidatesRe' : re .compile ('combx|comment|community|disqus|extra|foot|header|menu|remark|rss|shoutbox|sidebar|sponsor|ad-break|agegate|pagination|pager|popup|tweet|twitter' , re .I ),
@@ -190,7 +189,7 @@ def summary(self, html_partial=False):
190189 continue
191190 else :
192191 return cleaned_article
193- except StandardError as e :
192+ except Exception as e :
194193 log .exception ('error getting summary: ' )
195194 raise Unparseable (str (e )), None , sys .exc_info ()[2 ]
196195
@@ -635,9 +634,8 @@ def main():
635634 file = urllib .urlopen (options .url )
636635 else :
637636 file = open (args [0 ], 'rt' )
638- output_encoding = sys .__stdout__ .encoding or 'utf-8'
639- # XXX: a hack, better set PYTHONIOENCODING explicitly
640637 html = file .read () # bytes object
638+
641639 encoding = get_encoding (html )
642640 html = html .decode (encoding )
643641 try :
@@ -648,6 +646,9 @@ def main():
648646 result = 'Title: ' + doc .short_title () + '<br/>' + doc .summary ()
649647 open_in_browser (result )
650648 else :
649+ # XXX: a hack, better to set PYTHONIOENCODING explicitly
650+ output_encoding = sys .__stdout__ .encoding or 'utf-8'
651+
651652 print 'Title:' , doc .short_title ().encode (output_encoding , 'replace' )
652653 print doc .summary ().encode (output_encoding , 'replace' )
653654 finally :
0 commit comments