Mercurial Repository: p/roundup/code: roundup/dehtml.py comparison

comparison roundup/dehtml.py @ 5997:1700542408f3

flake8 cleanups dehtml.py Note you need to disable long lines as there is a test example that requires really long lines of htmlized output.

author	John Rouillard <rouilj@ieee.org>
date	Wed, 25 Dec 2019 20:18:39 -0500
parents	b74f0b50bef1
children	af81e7a4302f

comparison

equal deleted inserted replaced

-:69a35d164a69
+:1700542408f3
 from __future__ import print_function
 from roundup.anypy.strings import u2s, uchr
 class dehtml:
 def __init__(self, converter):
 if converter == "none":
 self.html2text = None
 return
 try:
 if converter == "beautifulsoup":
 # Not as well tested as dehtml.
 from bs4 import BeautifulSoup
 def html2text(html):
 soup = BeautifulSoup(html)
 # kill all script and style elements
 for script in soup(["script", "style"]):
 return u2s(soup.get_text('\n', strip=True))
 self.html2text = html2text
 else:
-raise ImportError # use
+raise ImportError
 except ImportError:
 # use the fallback below if beautiful soup is not installed.
 try:
 # Python 3+.
 from html.parser import HTMLParser
 from html.entities import name2codepoint
-pyver=3
+pyver = 3
 except ImportError:
 # Python 2.
 from HTMLParser import HTMLParser
 from htmlentitydefs import name2codepoint
-pyver=2
+pyver = 2
 class DumbHTMLParser(HTMLParser):
 # class attribute
-text=""
+text = ""
 # internal state variable
 _skip_data = False
 _last_empty = False
 def handle_data(self, data):
-if self._skip_data: # skip data if in script or style block
+if self._skip_data:  # skip data in script or style block
 return
-if ( data.strip() == ""):
+if (data.strip() == ""):
 # reduce multiple blank lines to 1
-if ( self._last_empty ):
+if (self._last_empty):
 return
 else:
 self._last_empty = True
 else:
 self._last_empty = False
-self.text=self.text + data
+self.text = self.text + data
 def handle_starttag(self, tag, attrs):
-if (tag == "p" ):
+if (tag == "p"):
-self.text= self.text + "\n"
+self.text = self.text + "\n"
-if (tag  in ("style", "script")):
+if (tag in ("style", "script")):
 self._skip_data = True
 def handle_endtag(self, tag):
-if (tag  in ("style", "script")):
+if (tag in ("style", "script")):
 self._skip_data = False
 def handle_entityref(self, name):
 if self._skip_data:
 return
 c = uchr(name2codepoint[name])
 try:
-self.text= self.text + c
+self.text = self.text + c
 except UnicodeEncodeError:
 # print a space as a placeholder
-self.text= self.text + ' '
+self.text = self.text + ' '
 def html2text(html):
 if pyver == 3:
 parser = DumbHTMLParser(convert_charrefs=True)
 else:
 parser.close()
 return parser.text
 self.html2text = html2text
 if "__main__" == __name__:
-html='''
+html = '''
 <body>
 <script>
 this must not be in output
 </script>
 <style>
 # trap error seen if N_TOKENS not defined when run.
 html2text = dehtml("beautifulsoup").html2text
 if html2text:
 print(html2text(html))
 except NameError as e:
-print("captured error %s"%e)
+print("captured error %s" % e)
 html2text = dehtml("none").html2text
 if html2text:
 print("FAIL: Error, dehtml(none) is returning a function")
 else:
 print("PASS: dehtml(none) is returning None")

Mercurial > p > roundup > code

comparison roundup/dehtml.py @ 5997:1700542408f3