File tree Expand file tree Collapse file tree 2 files changed +4
-14
lines changed
Expand file tree Collapse file tree 2 files changed +4
-14
lines changed Original file line number Diff line number Diff line change @@ -11,20 +11,10 @@ def build_doc(page):
1111 if isinstance (page , unicode ):
1212 page_unicode = page
1313 else :
14- enc = get_encoding (page )
15- if enc :
16- page_unicode = page .decode (enc , 'replace' )
17- encoding = enc
18- else :
19- try :
20- #try utf-8
21- page_unicode = page .decode ('utf-8' , 'strict' )
22- encoding = 'utf-8'
23- except UnicodeDecodeError :
24- page_unicode = page .decode ('utf-8' , 'replace' )
25- encoding = 'utf-8'
14+ enc = get_encoding (page ) or 'utf-8'
15+ page_unicode = page .decode (enc , 'replace' )
2616 doc = lxml .html .document_fromstring (page_unicode .encode ('utf-8' , 'replace' ), parser = utf8_parser )
27- return doc , encoding
17+ return doc , enc
2818
2919def js_re (src , pattern , flags , repl ):
3020 return re .compile (pattern , flags ).sub (src , repl .replace ('$' , '\\ ' ))
Original file line number Diff line number Diff line change 99
1010setup (
1111 name = "readability-lxml" ,
12- version = "0.3" ,
12+ version = "0.3.0.1 " ,
1313 author = "Yuri Baburov" ,
1414 author_email = "burchik@gmail.com" ,
1515 description = "fast python port of arc90's readability tool" ,
You can’t perform that action at this time.
0 commit comments