annotate roundup/dehtml.py @ 5676:e70885fe72a4

issue2551026: template variable not defined even though it is. Fix issue where variables defined in TAL expression are not available in the scope of the definition. (Tom Ekberg (tekberg))
author John Rouillard <rouilj@ieee.org>
date Tue, 26 Mar 2019 17:31:28 -0400
parents c749d6795bc2
children b74f0b50bef1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
1
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
2 from __future__ import print_function
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
3 from roundup.anypy.strings import u2s, uchr
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
4 class dehtml:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
5 def __init__(self, converter):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
6 if converter == "none":
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
7 self.html2text = None
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
8 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
9
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
10 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
11 if converter == "beautifulsoup":
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
12 # Not as well tested as dehtml.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
13 from bs4 import BeautifulSoup
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
14 def html2text(html):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
15 soup = BeautifulSoup(html)
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
16
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
17 # kill all script and style elements
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
18 for script in soup(["script", "style"]):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
19 script.extract()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
20
5416
56c9bcdea47f Python 3 preparation: unicode.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5411
diff changeset
21 return u2s(soup.get_text('\n', strip=True))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
22
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
23 self.html2text = html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
24 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
25 raise ImportError # use
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
26 except ImportError:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
27 # use the fallback below if beautiful soup is not installed.
5411
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
28 try:
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
29 # Python 3+.
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
30 from html.parser import HTMLParser
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
31 from html.entities import name2codepoint
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
32 except ImportError:
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
33 # Python 2.
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
34 from HTMLParser import HTMLParser
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
35 from htmlentitydefs import name2codepoint
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
36
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
37 class DumbHTMLParser(HTMLParser):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
38 # class attribute
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
39 text=""
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
40
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
41 # internal state variable
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
42 _skip_data = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
43 _last_empty = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
44
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
45 def handle_data(self, data):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
46 if self._skip_data: # skip data if in script or style block
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
47 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
48
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
49 if ( data.strip() == ""):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
50 # reduce multiple blank lines to 1
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
51 if ( self._last_empty ):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
52 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
53 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
54 self._last_empty = True
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
55 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
56 self._last_empty = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
57
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
58 self.text=self.text + data
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
59
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
60 def handle_starttag(self, tag, attrs):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
61 if (tag == "p" ):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
62 self.text= self.text + "\n"
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
63 if (tag in ("style", "script")):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
64 self._skip_data = True
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
65
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
66 def handle_endtag(self, tag):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
67 if (tag in ("style", "script")):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
68 self._skip_data = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
69
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
70 def handle_entityref(self, name):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
71 if self._skip_data:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
72 return
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
73 c = uchr(name2codepoint[name])
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
74 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
75 self.text= self.text + c
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
76 except UnicodeEncodeError:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
77 # print a space as a placeholder
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
78 pass
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
79
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
80 def html2text(html):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
81 parser = DumbHTMLParser()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
82 parser.feed(html)
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
83 parser.close()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
84 return parser.text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
85
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
86 self.html2text = html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
87
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
88 if "__main__" == __name__:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
89 html='''
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
90 <body>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
91 <script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
92 this must not be in output
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
93 </script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
94 <style>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
95 p {display:block}
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
96 </style>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
97 <div class="header"><h1>Roundup</h1>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
98 <div id="searchbox" style="display: none">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
99 <form class="search" action="../search.html" method="get">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
100 <input type="text" name="q" size="18" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
101 <input type="submit" value="Search" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
102 <input type="hidden" name="check_keywords" value="yes" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
103 <input type="hidden" name="area" value="default" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
104 </form>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
105 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
106 <script type="text/javascript">$('#searchbox').show(0);</script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
107 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
108 <ul class="current">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
109 <li class="toctree-l1"><a class="reference internal" href="../index.html">Home</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
110 <li class="toctree-l1"><a class="reference external" href="http://pypi.python.org/pypi/roundup">Download</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
111 <li class="toctree-l1 current"><a class="reference internal" href="../docs.html">Docs</a><ul class="current">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
112 <li class="toctree-l2"><a class="reference internal" href="features.html">Roundup Features</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
113 <li class="toctree-l2 current"><a class="current reference internal" href="">Installing Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
114 <li class="toctree-l2"><a class="reference internal" href="upgrading.html">Upgrading to newer versions of Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
115 <li class="toctree-l2"><a class="reference internal" href="FAQ.html">Roundup FAQ</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
116 <li class="toctree-l2"><a class="reference internal" href="user_guide.html">User Guide</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
117 <li class="toctree-l2"><a class="reference internal" href="customizing.html">Customising Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
118 <li class="toctree-l2"><a class="reference internal" href="admin_guide.html">Administration Guide</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
119 </ul>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
120 <div class="section" id="prerequisites">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
121 <h2><a class="toc-backref" href="#id5">Prerequisites</a></h2>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
122 <p>Roundup requires Python 2.5 or newer (but not Python 3) with a functioning
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
123 anydbm module. Download the latest version from <a class="reference external" href="http://www.python.org/">http://www.python.org/</a>.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
124 It is highly recommended that users install the latest patch version
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
125 of python as these contain many fixes to serious bugs.</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
126 <p>Some variants of Linux will need an additional &#8220;python dev&#8221; package
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
127 installed for Roundup installation to work. Debian and derivatives, are
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
128 known to require this.</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
129 <p>If you&#8217;re on windows, you will either need to be using the ActiveState python
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
130 distribution (at <a class="reference external" href="http://www.activestate.com/Products/ActivePython/">http://www.activestate.com/Products/ActivePython/</a>), or you&#8217;ll
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
131 have to install the win32all package separately (get it from
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
132 <a class="reference external" href="http://starship.python.net/crew/mhammond/win32/">http://starship.python.net/crew/mhammond/win32/</a>).</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
133 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
134 </body>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
135 '''
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
136
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
137 html2text = dehtml("dehtml").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
138 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
139 print(html2text(html))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
140
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
141 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
142 # trap error seen if N_TOKENS not defined when run.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
143 html2text = dehtml("beautifulsoup").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
144 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
145 print(html2text(html))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
146 except NameError as e:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
147 print("captured error %s"%e)
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
148
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
149 html2text = dehtml("none").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
150 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
151 print("FAIL: Error, dehtml(none) is returning a function")
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
152 else:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
153 print("PASS: dehtml(none) is returning None")
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
154
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
155

Roundup Issue Tracker: http://roundup-tracker.org/