annotate roundup/dehtml.py @ 5937:5d0873a4de4a

fix rate limit headers - were ints/floats need to be strings Running under gunicorn rest requests were crashing. Not all of the values for the rate limit headers were strings. Some were numbers. This caused the header generation for wsgi to fail. Now the values are all strings.
author John Rouillard <rouilj@ieee.org>
date Sun, 20 Oct 2019 20:56:56 -0400
parents b74f0b50bef1
children 1700542408f3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
1
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
2 from __future__ import print_function
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
3 from roundup.anypy.strings import u2s, uchr
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
4 class dehtml:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
5 def __init__(self, converter):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
6 if converter == "none":
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
7 self.html2text = None
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
8 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
9
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
10 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
11 if converter == "beautifulsoup":
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
12 # Not as well tested as dehtml.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
13 from bs4 import BeautifulSoup
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
14 def html2text(html):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
15 soup = BeautifulSoup(html)
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
16
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
17 # kill all script and style elements
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
18 for script in soup(["script", "style"]):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
19 script.extract()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
20
5416
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5411
diff changeset
21 return u2s(soup.get_text('\n', strip=True))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
22
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
23 self.html2text = html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
24 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
25 raise ImportError # use
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
26 except ImportError:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
27 # use the fallback below if beautiful soup is not installed.
5411
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
28 try:
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
29 # Python 3+.
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
30 from html.parser import HTMLParser
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
31 from html.entities import name2codepoint
5838
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
32 pyver=3
5411
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
33 except ImportError:
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
34 # Python 2.
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
35 from HTMLParser import HTMLParser
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
36 from htmlentitydefs import name2codepoint
5838
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
37 pyver=2
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
38
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
39 class DumbHTMLParser(HTMLParser):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
40 # class attribute
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
41 text=""
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
42
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
43 # internal state variable
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
44 _skip_data = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
45 _last_empty = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
46
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
47 def handle_data(self, data):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
48 if self._skip_data: # skip data if in script or style block
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
49 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
50
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
51 if ( data.strip() == ""):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
52 # reduce multiple blank lines to 1
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
53 if ( self._last_empty ):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
54 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
55 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
56 self._last_empty = True
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
57 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
58 self._last_empty = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
59
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
60 self.text=self.text + data
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
61
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
62 def handle_starttag(self, tag, attrs):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
63 if (tag == "p" ):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
64 self.text= self.text + "\n"
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
65 if (tag in ("style", "script")):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
66 self._skip_data = True
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
67
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
68 def handle_endtag(self, tag):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
69 if (tag in ("style", "script")):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
70 self._skip_data = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
71
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
72 def handle_entityref(self, name):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
73 if self._skip_data:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
74 return
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
75 c = uchr(name2codepoint[name])
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
76 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
77 self.text= self.text + c
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
78 except UnicodeEncodeError:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
79 # print a space as a placeholder
5838
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
80 self.text= self.text + ' '
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
81
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
82 def html2text(html):
5838
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
83 if pyver == 3:
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
84 parser = DumbHTMLParser(convert_charrefs=True)
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
85 else:
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
86 parser = DumbHTMLParser()
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
87 parser.feed(html)
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
88 parser.close()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
89 return parser.text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
90
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
91 self.html2text = html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
92
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
93 if "__main__" == __name__:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
94 html='''
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
95 <body>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
96 <script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
97 this must not be in output
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
98 </script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
99 <style>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
100 p {display:block}
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
101 </style>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
102 <div class="header"><h1>Roundup</h1>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
103 <div id="searchbox" style="display: none">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
104 <form class="search" action="../search.html" method="get">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
105 <input type="text" name="q" size="18" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
106 <input type="submit" value="Search" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
107 <input type="hidden" name="check_keywords" value="yes" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
108 <input type="hidden" name="area" value="default" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
109 </form>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
110 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
111 <script type="text/javascript">$('#searchbox').show(0);</script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
112 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
113 <ul class="current">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
114 <li class="toctree-l1"><a class="reference internal" href="../index.html">Home</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
115 <li class="toctree-l1"><a class="reference external" href="http://pypi.python.org/pypi/roundup">Download</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
116 <li class="toctree-l1 current"><a class="reference internal" href="../docs.html">Docs</a><ul class="current">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
117 <li class="toctree-l2"><a class="reference internal" href="features.html">Roundup Features</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
118 <li class="toctree-l2 current"><a class="current reference internal" href="">Installing Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
119 <li class="toctree-l2"><a class="reference internal" href="upgrading.html">Upgrading to newer versions of Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
120 <li class="toctree-l2"><a class="reference internal" href="FAQ.html">Roundup FAQ</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
121 <li class="toctree-l2"><a class="reference internal" href="user_guide.html">User Guide</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
122 <li class="toctree-l2"><a class="reference internal" href="customizing.html">Customising Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
123 <li class="toctree-l2"><a class="reference internal" href="admin_guide.html">Administration Guide</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
124 </ul>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
125 <div class="section" id="prerequisites">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
126 <h2><a class="toc-backref" href="#id5">Prerequisites</a></h2>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
127 <p>Roundup requires Python 2.5 or newer (but not Python 3) with a functioning
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
128 anydbm module. Download the latest version from <a class="reference external" href="http://www.python.org/">http://www.python.org/</a>.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
129 It is highly recommended that users install the latest patch version
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
130 of python as these contain many fixes to serious bugs.</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
131 <p>Some variants of Linux will need an additional &#8220;python dev&#8221; package
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
132 installed for Roundup installation to work. Debian and derivatives, are
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
133 known to require this.</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
134 <p>If you&#8217;re on windows, you will either need to be using the ActiveState python
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
135 distribution (at <a class="reference external" href="http://www.activestate.com/Products/ActivePython/">http://www.activestate.com/Products/ActivePython/</a>), or you&#8217;ll
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
136 have to install the win32all package separately (get it from
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
137 <a class="reference external" href="http://starship.python.net/crew/mhammond/win32/">http://starship.python.net/crew/mhammond/win32/</a>).</p>
5838
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
138 <script>
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
139 &lt; HELP &GT;
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
140 </script>
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
141 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
142 </body>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
143 '''
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
144
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
145 html2text = dehtml("dehtml").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
146 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
147 print(html2text(html))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
148
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
149 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
150 # trap error seen if N_TOKENS not defined when run.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
151 html2text = dehtml("beautifulsoup").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
152 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
153 print(html2text(html))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
154 except NameError as e:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
155 print("captured error %s"%e)
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
156
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
157 html2text = dehtml("none").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
158 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
159 print("FAIL: Error, dehtml(none) is returning a function")
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
160 else:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
161 print("PASS: dehtml(none) is returning None")
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
162
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
163

Roundup Issue Tracker: http://roundup-tracker.org/